diff --git a/.editorconfig b/.editorconfig index 95549501..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,12 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 -[*.json] -insert_final_newline = unset - # These files are edited and tested upstream in nf-core/modules [/modules/nf-core/**] charset = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index b4bff9b6..17e6615b 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,8 +15,7 @@ Contributions to the code are even more welcome ;) If you'd like to write some code for nf-core/viralrecon, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/viralrecon issues](https://github.com/nf-core/viralrecon/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/viralrecon issues](https://github.com/nf-core/viralrecon/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/viralrecon repository](https://github.com/nf-core/viralrecon) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) 4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). @@ -49,9 +48,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help @@ -73,7 +72,7 @@ If you wish to contribute a new step, please use the following coding standards: 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values @@ -92,8 +91,8 @@ The process resources can be passed on to the tool dynamically within the proces Please use the following naming schemes, to make it easy to understand what is going where. -* initial process channel: `ch_output_from_` -* intermediate and terminal channels: `ch__for_` +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` ### Nextflow version bumping diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index b5338dc4..c8de4b1e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,7 +2,6 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: - - type: markdown attributes: value: | diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b3799717..3ff7d7ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,14 +10,13 @@ Remember that PRs should be made against the dev branch, unless you're preparing Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) --> - ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index b9915119..5600fcf9 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -18,20 +18,14 @@ jobs: platform: ["illumina", "nanopore"] steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v2 - + uses: nf-core/tower-action@v3 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/viralrecon/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}/platform_${{ matrix.platform }}" } profiles: test_full_${{ matrix.platform }},aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index f775f1ab..a3ce4a0a 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -10,21 +10,16 @@ jobs: if: github.repository == 'nf-core/viralrecon' runs-on: ubuntu-latest steps: + # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v2 - + uses: nf-core/tower-action@v3 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/viralrecon/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-test-${{ github.sha }}" } profiles: test,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index d501fff4..c585d9fe 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -15,7 +15,6 @@ jobs: run: | { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d3067a5..a47ef384 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,20 +14,20 @@ env: jobs: test: - name: Run workflow tests + name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" runs-on: ubuntu-latest strategy: matrix: # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' + - NXF_VER: "21.10.3" + NXF_EDGE: "" # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + - NXF_VER: "" + NXF_EDGE: "1" steps: - name: Check out pipeline code uses: actions/checkout@v2 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..d1fb62d0 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/viralrecon' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "::set-output name=result::pass" + else + echo "::set-output name=result::fail" + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 0c9c5554..77358dee 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,7 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: pull_request: @@ -8,42 +9,6 @@ on: types: [published] jobs: - Markdown: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint . - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## Markdown linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `markdownlint-cli` - * On Mac: `brew install markdownlint-cli` - * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) - * Fix the markdown errors - * Automatically: `markdownlint . --fix` - * Manually resolve anything left from `markdownlint .` - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false - EditorConfig: runs-on: ubuntu-latest steps: @@ -55,47 +20,24 @@ jobs: run: npm install -g editorconfig-checker - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - YAML: + Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v2 - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml" -o -name "*.yaml") -c ${GITHUB_WORKSPACE}/.yamllint.yml - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## YAML linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `yaml-lint` - * [Install `npm`](https://www.npmjs.com/get-npm) then [install `yaml-lint`](https://www.npmjs.com/package/yaml-lint) (`npm install -g yaml-lint`) - * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml") -c ./.yamllint.yml` - * Fix any reported errors in your YAML files + - uses: actions/checkout@v2 - Once you push these changes the test should pass, and you can hide this comment :+1: + - uses: actions/setup-node@v2 - We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install Prettier + run: npm install -g prettier - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code uses: actions/checkout@v2 @@ -106,10 +48,10 @@ jobs: wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v3 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.6" + architecture: "x64" - name: Install dependencies run: | diff --git a/.gitpod.yml b/.gitpod.yml index b7d4cee1..e034a61d 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -2,13 +2,13 @@ image: nfcore/gitpod:latest vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.markdownlint.yml b/.markdownlint.yml deleted file mode 100644 index e7fc97a7..00000000 --- a/.markdownlint.yml +++ /dev/null @@ -1,16 +0,0 @@ -# Markdownlint configuration file -default: true -line-length: false -ul-indent: - indent: 4 -no-duplicate-header: - siblings_only: true -no-inline-html: - allowed_elements: - - img - - p - - kbd - - details - - summary -single-title: - level: 2 diff --git a/.nf-core.yml b/.nf-core.yml index 66b7be11..192f7e02 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,12 +1,10 @@ +repository_type: pipeline lint: files_unchanged: - - .markdownlint.yml - assets/email_template.html - assets/email_template.txt - lib/NfcoreTemplate.groovy - - .github/ISSUE_TEMPLATE/bug_report.yml - - .github/PULL_REQUEST_TEMPLATE.md - - .github/workflows/branch.yml - - .github/workflows/linting_comment.yml - - .github/workflows/linting.yml - - lib/NfcoreSchema.groovy + files_exist: + - assets/multiqc_config.yml + - conf/igenomes.config + - lib/WorkflowViralrecon.groovy diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..d0e7ae58 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,9 @@ +email_template.html +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.yamllint.yml b/.yamllint.yml deleted file mode 100644 index 6889fa34..00000000 --- a/.yamllint.yml +++ /dev/null @@ -1,5 +0,0 @@ -extends: default - -rules: - document-start: disable - line-length: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index ff1122c8..a75796a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,25 +3,66 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[2.5](https://github.com/nf-core/viralrecon/releases/tag/2.5)] - 2022-07-13 + +### Enhancements & fixes + +- Default Nextclade dataset shipped with the pipeline has been bumped from `2022-01-18T12:00:00Z` -> `2022-06-14T12:00:00Z` +- [[#234](https://github.com/nf-core/viralrecon/issues/234)] - Remove replacement of dashes in sample name with underscores +- [[#292](https://github.com/nf-core/viralrecon/issues/292)] - Filter empty FastQ files after adapter trimming +- [[#303](https://github.com/nf-core/viralrecon/pull/303)] - New pangolin dbs (4.0.x) not assigning lineages to Sars-CoV-2 samples in MultiQC report correctly +- [[#304](https://github.com/nf-core/viralrecon/pull/304)] - Re-factor code of `ivar_variants_to_vcf` script +- [[#306](https://github.com/nf-core/viralrecon/issues/306)] - Add contig field information in vcf header in ivar_variants_to_vcf and use bcftools sort +- [[#311](https://github.com/nf-core/viralrecon/issues/311)] - Invalid declaration val medaka_model_string +- [[#316](https://github.com/nf-core/viralrecon/issues/316)] - Variant calling isn't run when using --skip_asciigenome with metagenomic data +- [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer +- Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `artic` | 1.2.1 | 1.2.2 | +| `bcftools` | 1.14 | 1.15.1 | +| `multiqc` | 1.11 | 1.13a | +| `nanoplot` | 1.39.0 | 1.40.0 | +| `nextclade` | 1.10.2 | 2.2.0 | +| `pangolin` | 3.1.20 | 4.1.1 | +| `picard` | 2.26.10 | 2.27.4 | +| `quast` | 5.0.2 | 5.2.0 | +| `samtools` | 1.14 | 1.15.1 | +| `spades` | 3.15.3 | 3.15.4 | +| `vcflib` | 1.0.2 | 1.0.3 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +### Parameters + ## [[2.4.1](https://github.com/nf-core/viralrecon/releases/tag/2.4.1)] - 2022-03-01 ### Enhancements & fixes -* [[#288](https://github.com/nf-core/viralrecon/issues/288)] - `--primer_set_version` only accepts Integers (incompatible with "4.1" Artic primers set) +- [[#288](https://github.com/nf-core/viralrecon/issues/288)] - `--primer_set_version` only accepts Integers (incompatible with "4.1" Artic primers set) ## [[2.4](https://github.com/nf-core/viralrecon/releases/tag/2.4)] - 2022-02-22 ### Enhancements & fixes -* [nf-core/tools#1415](https://github.com/nf-core/tools/issues/1415) - Make `--outdir` a mandatory parameter -* [[#281](https://github.com/nf-core/viralrecon/issues/281)] - Nanopore medaka processing fails with error if model name, not model file, provided -* [[#286](https://github.com/nf-core/viralrecon/issues/286)] - IVAR_VARIANTS silently failing when FAI index is missing +- [nf-core/tools#1415](https://github.com/nf-core/tools/issues/1415) - Make `--outdir` a mandatory parameter +- [[#281](https://github.com/nf-core/viralrecon/issues/281)] - Nanopore medaka processing fails with error if model name, not model file, provided +- [[#286](https://github.com/nf-core/viralrecon/issues/286)] - IVAR_VARIANTS silently failing when FAI index is missing ### Parameters -| Old parameter | New parameter | -|-------------------------------|---------------------------------------| -| | `--publish_dir_mode` | +| Old parameter | New parameter | +| ------------- | -------------------- | +| | `--publish_dir_mode` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > @@ -33,49 +74,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -* [[#277](https://github.com/nf-core/viralrecon/issues/277)] - Misuse of rstrip in make_variants_long_table.py script +- [[#277](https://github.com/nf-core/viralrecon/issues/277)] - Misuse of rstrip in make_variants_long_table.py script ### Software dependencies -| Dependency | Old version | New version | -|-------------------------------|-------------|-------------| -| `mosdepth` | 0.3.2 | 0.3.3 | -| `pangolin` | 3.1.19 | 3.1.20 | +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `mosdepth` | 0.3.2 | 0.3.3 | +| `pangolin` | 3.1.19 | 3.1.20 | ## [[2.3](https://github.com/nf-core/viralrecon/releases/tag/2.3)] - 2022-02-04 ### :warning: Major enhancements -* Please see [Major updates in v2.3](https://github.com/nf-core/viralrecon/issues/271) for a more detailed list of changes added in this version. -* When using `--protocol amplicon`, in the previous release, iVar was used for both the variant calling and consensus sequence generation. The pipeline will now perform the variant calling and consensus sequence generation with iVar and BCFTools/BEDTools, respectively. -* Bump minimum Nextflow version from `21.04.0` -> `21.10.3` +- Please see [Major updates in v2.3](https://github.com/nf-core/viralrecon/issues/271) for a more detailed list of changes added in this version. +- When using `--protocol amplicon`, in the previous release, iVar was used for both the variant calling and consensus sequence generation. The pipeline will now perform the variant calling and consensus sequence generation with iVar and BCFTools/BEDTools, respectively. +- Bump minimum Nextflow version from `21.04.0` -> `21.10.3` ### Enhancements & fixes -* Port pipeline to the updated Nextflow DSL2 syntax adopted on nf-core/modules -* Updated pipeline template to [nf-core/tools 2.2](https://github.com/nf-core/tools/releases/tag/2.2) -* [[#209](https://github.com/nf-core/viralrecon/issues/209)] - Check that contig in primer BED and genome fasta match -* [[#218](https://github.com/nf-core/viralrecon/issues/218)] - Support for compressed FastQ files for Nanopore data -* [[#232](https://github.com/nf-core/viralrecon/issues/232)] - Remove duplicate variants called by ARTIC ONT pipeline -* [[#235](https://github.com/nf-core/viralrecon/issues/235)] - Nextclade version bump -* [[#244](https://github.com/nf-core/viralrecon/issues/244)] - Fix BCFtools consensus generation and masking -* [[#245](https://github.com/nf-core/viralrecon/issues/245)] - Mpileup file as output -* [[#246](https://github.com/nf-core/viralrecon/issues/246)] - Option to generate consensus with BCFTools / BEDTools using iVar variants -* [[#247](https://github.com/nf-core/viralrecon/issues/247)] - Add strand-bias filtering option and codon fix in consecutive positions in ivar tsv conversion to vcf -* [[#248](https://github.com/nf-core/viralrecon/issues/248)] - New variants reporting table +- Port pipeline to the updated Nextflow DSL2 syntax adopted on nf-core/modules +- Updated pipeline template to [nf-core/tools 2.2](https://github.com/nf-core/tools/releases/tag/2.2) +- [[#209](https://github.com/nf-core/viralrecon/issues/209)] - Check that contig in primer BED and genome fasta match +- [[#218](https://github.com/nf-core/viralrecon/issues/218)] - Support for compressed FastQ files for Nanopore data +- [[#232](https://github.com/nf-core/viralrecon/issues/232)] - Remove duplicate variants called by ARTIC ONT pipeline +- [[#235](https://github.com/nf-core/viralrecon/issues/235)] - Nextclade version bump +- [[#244](https://github.com/nf-core/viralrecon/issues/244)] - Fix BCFtools consensus generation and masking +- [[#245](https://github.com/nf-core/viralrecon/issues/245)] - Mpileup file as output +- [[#246](https://github.com/nf-core/viralrecon/issues/246)] - Option to generate consensus with BCFTools / BEDTools using iVar variants +- [[#247](https://github.com/nf-core/viralrecon/issues/247)] - Add strand-bias filtering option and codon fix in consecutive positions in ivar tsv conversion to vcf +- [[#248](https://github.com/nf-core/viralrecon/issues/248)] - New variants reporting table ### Parameters -| Old parameter | New parameter | -|-------------------------------|---------------------------------------| -| | `--nextclade_dataset` | -| | `--nextclade_dataset_name` | -| | `--nextclade_dataset_reference` | -| | `--nextclade_dataset_tag` | -| | `--skip_consensus_plots` | -| | `--skip_variants_long_table` | -| | `--consensus_caller` | -| `--callers` | `--variant_caller` | +| Old parameter | New parameter | +| ------------- | ------------------------------- | +| | `--nextclade_dataset` | +| | `--nextclade_dataset_name` | +| | `--nextclade_dataset_reference` | +| | `--nextclade_dataset_tag` | +| | `--skip_consensus_plots` | +| | `--skip_variants_long_table` | +| | `--consensus_caller` | +| `--callers` | `--variant_caller` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > @@ -87,25 +128,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Dependency | Old version | New version | -|-------------------------------|-------------|-------------| -| `bcftools` | 1.11 | 1.14 | -| `blast` | 2.10.1 | 2.12.0 | -| `bowtie2` | 2.4.2 | 2.4.4 | -| `cutadapt` | 3.2 | 3.5 | -| `fastp` | 0.20.1 | 0.23.2 | -| `kraken2` | 2.1.1 | 2.1.2 | -| `minia` | 3.2.4 | 3.2.6 | -| `mosdepth` | 0.3.1 | 0.3.2 | -| `nanoplot` | 1.36.1 | 1.39.0 | -| `nextclade` | | 1.10.2 | -| `pangolin` | 3.1.7 | 3.1.19 | -| `picard` | 2.23.9 | 2.26.10 | -| `python` | 3.8.3 | 3.9.5 | -| `samtools` | 1.10 | 1.14 | -| `spades` | 3.15.2 | 3.15.3 | -| `tabix` | 0.2.6 | 1.11 | -| `vcflib` | | 1.0.2 | +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `bcftools` | 1.11 | 1.14 | +| `blast` | 2.10.1 | 2.12.0 | +| `bowtie2` | 2.4.2 | 2.4.4 | +| `cutadapt` | 3.2 | 3.5 | +| `fastp` | 0.20.1 | 0.23.2 | +| `kraken2` | 2.1.1 | 2.1.2 | +| `minia` | 3.2.4 | 3.2.6 | +| `mosdepth` | 0.3.1 | 0.3.2 | +| `nanoplot` | 1.36.1 | 1.39.0 | +| `nextclade` | | 1.10.2 | +| `pangolin` | 3.1.7 | 3.1.19 | +| `picard` | 2.23.9 | 2.26.10 | +| `python` | 3.8.3 | 3.9.5 | +| `samtools` | 1.10 | 1.14 | +| `spades` | 3.15.2 | 3.15.3 | +| `tabix` | 0.2.6 | 1.11 | +| `vcflib` | | 1.0.2 | > **NB:** Dependency has been **updated** if both old and new version information is present. > @@ -117,20 +158,20 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### Enhancements & fixes -* Updated pipeline template to [nf-core/tools 2.1](https://github.com/nf-core/tools/releases/tag/2.1) -* Remove custom content to render Pangolin report in MultiQC as it was officially added as a module in [v1.11](https://github.com/ewels/MultiQC/pull/1458) -* [[#212](https://github.com/nf-core/viralrecon/issues/212)] - Access to `PYCOQC.out` is undefined -* [[#229](https://github.com/nf-core/viralrecon/issues/229)] - ARTIC Guppyplex settings for 1200bp ARTIC primers with Nanopore data +- Updated pipeline template to [nf-core/tools 2.1](https://github.com/nf-core/tools/releases/tag/2.1) +- Remove custom content to render Pangolin report in MultiQC as it was officially added as a module in [v1.11](https://github.com/ewels/MultiQC/pull/1458) +- [[#212](https://github.com/nf-core/viralrecon/issues/212)] - Access to `PYCOQC.out` is undefined +- [[#229](https://github.com/nf-core/viralrecon/issues/229)] - ARTIC Guppyplex settings for 1200bp ARTIC primers with Nanopore data ### Software dependencies Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Dependency | Old version | New version | -|-------------------------------|-------------|-------------| -| `multiqc` | 1.10.1 | 1.11 | -| `pangolin` | 3.0.5 | 3.1.7 | -| `samtools` | 1.10 | 1.12 | +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.10.1 | 1.11 | +| `pangolin` | 3.0.5 | 3.1.7 | +| `samtools` | 1.10 | 1.12 | > **NB:** Dependency has been **updated** if both old and new version information is present. > @@ -142,25 +183,25 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### Enhancements & fixes -* Removed workflow to download data from public databases in favour of using [nf-core/fetchngs](https://nf-co.re/fetchngs) -* Added Pangolin results to MultiQC report -* Added warning to MultiQC report for samples that have no reads after adapter trimming -* Added docs about structure of data required for running Nanopore data -* Added docs about using other primer sets for Illumina data -* Added docs about overwriting default container definitions to use latest versions e.g. Pangolin -* Dashes and spaces in sample names will be converted to underscores to avoid issues when creating the summary metrics -* [[#196](https://github.com/nf-core/viralrecon/issues/196)] - Add mosdepth heatmap to MultiQC report -* [[#197](https://github.com/nf-core/viralrecon/issues/197)] - Output a .tsv comprising the Nextclade and Pangolin results for all samples processed -* [[#198](https://github.com/nf-core/viralrecon/issues/198)] - ASCIIGenome failing during analysis -* [[#201](https://github.com/nf-core/viralrecon/issues/201)] - Conditional include are not expected to work -* [[#204](https://github.com/nf-core/viralrecon/issues/204)] - Memory errors for SNP_EFF step +- Removed workflow to download data from public databases in favour of using [nf-core/fetchngs](https://nf-co.re/fetchngs) +- Added Pangolin results to MultiQC report +- Added warning to MultiQC report for samples that have no reads after adapter trimming +- Added docs about structure of data required for running Nanopore data +- Added docs about using other primer sets for Illumina data +- Added docs about overwriting default container definitions to use latest versions e.g. Pangolin +- Dashes and spaces in sample names will be converted to underscores to avoid issues when creating the summary metrics +- [[#196](https://github.com/nf-core/viralrecon/issues/196)] - Add mosdepth heatmap to MultiQC report +- [[#197](https://github.com/nf-core/viralrecon/issues/197)] - Output a .tsv comprising the Nextclade and Pangolin results for all samples processed +- [[#198](https://github.com/nf-core/viralrecon/issues/198)] - ASCIIGenome failing during analysis +- [[#201](https://github.com/nf-core/viralrecon/issues/201)] - Conditional include are not expected to work +- [[#204](https://github.com/nf-core/viralrecon/issues/204)] - Memory errors for SNP_EFF step ### Parameters -| Old parameter | New parameter | -|-------------------------------|---------------------------------------| -| `--public_data_ids` | | -| `--skip_sra_fastq_download` | | +| Old parameter | New parameter | +| --------------------------- | ------------- | +| `--public_data_ids` | | +| `--skip_sra_fastq_download` | | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > @@ -172,10 +213,10 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Dependency | Old version | New version | -|-------------------------------|-------------|-------------| -| `nextclade_js` | 0.14.2 | 0.14.4 | -| `pangolin` | 2.4.2 | 3.0.5 | +| Dependency | Old version | New version | +| -------------- | ----------- | ----------- | +| `nextclade_js` | 0.14.2 | 0.14.4 | +| `pangolin` | 2.4.2 | 3.0.5 | > **NB:** Dependency has been **updated** if both old and new version information is present. > @@ -187,36 +228,36 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### :warning: Major enhancements -* Pipeline has been re-implemented in [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) -* All software containers are now exclusively obtained from [Biocontainers](https://biocontainers.pro/#/registry) -* Updated minimum Nextflow version to `v21.04.0` (see [nextflow#572](https://github.com/nextflow-io/nextflow/issues/1964)) -* [BCFtools](http://samtools.github.io/bcftools/bcftools.html) and [iVar](https://github.com/andersen-lab/ivar) will be run by default for Illumina metagenomics and amplicon data, respectively. However, this behaviour can be customised with the `--callers` parameter. -* Variant graph processes to call variants relative to the reference genome directly from _de novo_ assemblies have been deprecated and removed -* Variant calling with Varscan 2 has been deprecated and removed due to [licensing restrictions](https://github.com/dkoboldt/varscan/issues/12) -* New tools: - * [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis - * [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks - * [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks +- Pipeline has been re-implemented in [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) +- All software containers are now exclusively obtained from [Biocontainers](https://biocontainers.pro/#/registry) +- Updated minimum Nextflow version to `v21.04.0` (see [nextflow#572](https://github.com/nextflow-io/nextflow/issues/1964)) +- [BCFtools](http://samtools.github.io/bcftools/bcftools.html) and [iVar](https://github.com/andersen-lab/ivar) will be run by default for Illumina metagenomics and amplicon data, respectively. However, this behaviour can be customised with the `--callers` parameter. +- Variant graph processes to call variants relative to the reference genome directly from _de novo_ assemblies have been deprecated and removed +- Variant calling with Varscan 2 has been deprecated and removed due to [licensing restrictions](https://github.com/dkoboldt/varscan/issues/12) +- New tools: + - [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis + - [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks + - [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks ### Other enhancements & fixes -* Illumina and Nanopore runs containing the same 48 samples sequenced on both platforms have been uploaded to the nf-core AWS account for full-sized tests on release -* Initial implementation of a standardised samplesheet JSON schema to use with user interfaces and for validation -* Default human `--kraken2_db` link has been changed from Zenodo to an AWS S3 bucket for more reliable downloads -* Updated pipeline template to nf-core/tools `1.14` -* Optimise MultiQC configuration and input files for faster run-time on huge sample numbers -* [[#122](https://github.com/nf-core/viralrecon/issues/122)] - Single SPAdes command to rule them all -* [[#138](https://github.com/nf-core/viralrecon/issues/138)] - Problem masking the consensus sequence -* [[#142](https://github.com/nf-core/viralrecon/issues/142)] - Unknown method invocation `toBytes` on String type -* [[#169](https://github.com/nf-core/viralrecon/issues/169)] - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers -* [[#170](https://github.com/nf-core/viralrecon/issues/170)] - ivar trimming of Swift libraries new offset feature -* [[#175](https://github.com/nf-core/viralrecon/issues/175)] - MultiQC report does not include all the metrics -* [[#188](https://github.com/nf-core/viralrecon/pull/188)] - Add and fix EditorConfig linting in entire pipeline +- Illumina and Nanopore runs containing the same 48 samples sequenced on both platforms have been uploaded to the nf-core AWS account for full-sized tests on release +- Initial implementation of a standardised samplesheet JSON schema to use with user interfaces and for validation +- Default human `--kraken2_db` link has been changed from Zenodo to an AWS S3 bucket for more reliable downloads +- Updated pipeline template to nf-core/tools `1.14` +- Optimise MultiQC configuration and input files for faster run-time on huge sample numbers +- [[#122](https://github.com/nf-core/viralrecon/issues/122)] - Single SPAdes command to rule them all +- [[#138](https://github.com/nf-core/viralrecon/issues/138)] - Problem masking the consensus sequence +- [[#142](https://github.com/nf-core/viralrecon/issues/142)] - Unknown method invocation `toBytes` on String type +- [[#169](https://github.com/nf-core/viralrecon/issues/169)] - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers +- [[#170](https://github.com/nf-core/viralrecon/issues/170)] - ivar trimming of Swift libraries new offset feature +- [[#175](https://github.com/nf-core/viralrecon/issues/175)] - MultiQC report does not include all the metrics +- [[#188](https://github.com/nf-core/viralrecon/pull/188)] - Add and fix EditorConfig linting in entire pipeline ### Parameters | Old parameter | New parameter | -|-------------------------------|---------------------------------------| +| ----------------------------- | ------------------------------------- | | `--amplicon_bed` | `--primer_bed` | | `--amplicon_fasta` | `--primer_fasta` | | `--amplicon_left_suffix` | `--primer_left_suffix` | @@ -292,7 +333,7 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. | Dependency | Old version | New version | -|-------------------------------|-------------|-------------| +| ----------------------------- | ----------- | ----------- | | `artic` | | 1.2.1 | | `asciigenome` | | 1.16.0 | | `bc` | 1.07.1 | | @@ -344,44 +385,44 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### Added -* [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot -* [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers -* [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub -* Parameters: - * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads - * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter - * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output - * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` - * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` - * Unify parameter specification with COG-UK pipeline: - * `--min_allele_freq` - minimum allele frequency threshold for calling variants - * `--mpileup_depth` - SAMTools mpileup max per-file depth - * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` - * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming - * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass - * `--ivar_trim_window_width` - width of sliding window -* [#118] Updated GitHub Actions AWS workflow for small and full size tests. +- [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot +- [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers +- [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub +- Parameters: + - `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads + - `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter + - `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output + - `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` + - `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` + - Unify parameter specification with COG-UK pipeline: + - `--min_allele_freq` - minimum allele frequency threshold for calling variants + - `--mpileup_depth` - SAMTools mpileup max per-file depth + - `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` + - `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + - `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + - `--ivar_trim_window_width` - width of sliding window +- [#118] Updated GitHub Actions AWS workflow for small and full size tests. ### Removed -* `--skip_qc` parameter +- `--skip_qc` parameter ### Dependencies -* Add mosdepth `0.2.6` -* Add bioconductor-complexheatmap `2.2.0` -* Add bioconductor-biostrings `2.54.0` -* Add r-optparse `1.6.6` -* Add r-tidyr `1.1.0` -* Add r-tidyverse `1.3.0` -* Add r-ggplot2 `3.3.1` -* Add r-reshape2 `1.4.4` -* Add r-viridis `0.5.1` -* Update sra-tools `2.10.3` -> `2.10.7` -* Update bowtie2 `2.3.5.1` -> `2.4.1` -* Update picard `2.22.8` -> `2.23.0` -* Update minia `3.2.3` -> `3.2.4` -* Update plasmidid `1.5.2` -> `1.6.3` +- Add mosdepth `0.2.6` +- Add bioconductor-complexheatmap `2.2.0` +- Add bioconductor-biostrings `2.54.0` +- Add r-optparse `1.6.6` +- Add r-tidyr `1.1.0` +- Add r-tidyverse `1.3.0` +- Add r-ggplot2 `3.3.1` +- Add r-reshape2 `1.4.4` +- Add r-viridis `0.5.1` +- Update sra-tools `2.10.3` -> `2.10.7` +- Update bowtie2 `2.3.5.1` -> `2.4.1` +- Update picard `2.22.8` -> `2.23.0` +- Update minia `3.2.3` -> `3.2.4` +- Update plasmidid `1.5.2` -> `1.6.3` ## [[1.0.0](https://github.com/nf-core/viralrecon/releases/tag/1.0.0)] - 2020-06-01 @@ -396,22 +437,22 @@ This pipeline is a re-implementation of the [SARS_Cov2_consensus-nf](https://git 3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 4. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) 5. Variant calling - 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) - 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); _amplicon data only_) - 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); _removal optional_) - 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 6. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) _||_ [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) _||_ [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) - * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) - * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); _amplicon data only_) + 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); _removal optional_) + 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 6. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) _||_ [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) _||_ [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) 6. _De novo_ assembly - 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) - 2. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/)) - 3. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`metaSPAdes`](http://cab.spbu.ru/software/meta-spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) - * Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) - * Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) - * Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) - * Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - * Call variants relative to reference ([`Minimap2`](https://github.com/lh3/minimap2), [`seqwish`](https://github.com/ekg/seqwish), [`vg`](https://github.com/vgteam/vg), [`Bandage`](https://github.com/rrwick/Bandage)) - * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) + 2. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/)) + 3. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`metaSPAdes`](http://cab.spbu.ru/software/meta-spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) + - Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) + - Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) + - Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) + - Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Call variants relative to reference ([`Minimap2`](https://github.com/lh3/minimap2), [`seqwish`](https://github.com/ekg/seqwish), [`vg`](https://github.com/vgteam/vg), [`Bandage`](https://github.com/rrwick/Bandage)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) 7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) diff --git a/CITATIONS.md b/CITATIONS.md index ca45fbab..c467f2d7 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,102 +10,130 @@ ## Pipeline tools -* [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) - > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. +- [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) -* [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) - > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. + > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. -* [ARTIC network](https://github.com/artic-network) +- [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) -* [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) - > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. + > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. -* [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) - > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. +- [ARTIC network](https://github.com/artic-network) -* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. +- [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) -* [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) - > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. + > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. -* [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) - > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. +- [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) -* [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) - > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. -* [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) - > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. +- [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. -* [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) - > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. +- [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) -* [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) - > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. + > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. -* [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) - > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. +- [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) -* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) - > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. -* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) -* [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) - > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. -* [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) - > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. +- [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) -* [pangolin](https://github.com/cov-lineages/pangolin) - > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. -* [picard-tools](http://broadinstitute.github.io/picard) +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [pycoQC](https://doi.org/10.21105/joss.01236) - > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. +- [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) -* [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) - > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. -* [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. +- [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) -* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. -* [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) - > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. +- [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) -* [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) - > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. -* [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) - > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. +- [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) -* [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) - > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. -* [Vcflib](https://www.biorxiv.org/content/early/2021/05/23/2021.05.21.445151) - > Garrison E, Kronenberg ZN, Dawson ET, Pedersen BS, P Pjotr. Vcflib and tools for processing the VCF variant call format. bioRxiv 2021 May.doi: 10.1101/2021.05.21.445151. +- [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) + + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + +- [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) + + > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. + +- [pangolin](https://github.com/cov-lineages/pangolin) + + > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [pycoQC](https://doi.org/10.21105/joss.01236) + + > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. + +- [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) + + > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) + + > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +- [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) + + > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + +- [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) + + > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. + +- [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) + + > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. + +- [Vcflib](https://www.biorxiv.org/content/early/2021/05/23/2021.05.21.445151) + > Garrison E, Kronenberg ZN, Dawson ET, Pedersen BS, P Pjotr. Vcflib and tools for processing the VCF variant call format. bioRxiv 2021 May.doi: 10.1101/2021.05.21.445151. ## Software packaging/containerisation tools -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 6654c669..2b880174 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,18 @@ [![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3901628-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3901628) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3901628-1073c8)](https://doi.org/10.5281/zenodo.3901628) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/viralrecon) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/viralrecon) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -35,29 +36,29 @@ A number of improvements were made to the pipeline recently, mainly with regard 1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html)) 2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 3. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) -4. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/); *optional*) +4. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/); _optional_) 5. Variant calling - 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) - 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); *amplicon data only*) - 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); *optional*) - 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) - 7. Choice of multiple variant callers ([`iVar variants`](https://github.com/andersen-lab/ivar); *default for amplicon data* *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html); *default for metagenomics data*) - * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) - * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) - 8. Choice of multiple consensus callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); *default for both amplicon and metagenomics data* *||* [`iVar consensus`](https://github.com/andersen-lab/ivar)) - * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - * Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - * Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - 9. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) -6. *De novo* assembly - 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); *amplicon data only*) - 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) *||* [`Unicycler`](https://github.com/rrwick/Unicycler) *||* [`minia`](https://github.com/GATB/minia)) - * Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) - * Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) - * Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) - * Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); _amplicon data only_) + 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); _optional_) + 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) + 7. Choice of multiple variant callers ([`iVar variants`](https://github.com/andersen-lab/ivar); _default for amplicon data_ _||_ [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html); _default for metagenomics data_) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + 8. Choice of multiple consensus callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); _default for both amplicon and metagenomics data_ _||_ [`iVar consensus`](https://github.com/andersen-lab/ivar)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) + 9. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) +6. _De novo_ assembly + 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) + 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) + - Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) + - Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) + - Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) + - Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) 7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) ### Nanopore @@ -69,93 +70,94 @@ A number of improvements were made to the pipeline recently, mainly with regard 5. Remove unmapped reads and obtain alignment metrics ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) 7. Downstream variant analysis: - * Count metrics ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) - * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) - * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) - * Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) - * Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) - * Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) - * Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + - Count metrics ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) + - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + - Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) 8. Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ([`MultiQC`](http://multiqc.info/)) ## Quick Start 1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility *(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))* +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console - nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir - ``` + ```console + nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir + ``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! - * Typical command for Illumina shotgun analysis: - - ```bash - nextflow run nf-core/viralrecon \ - --input samplesheet.csv \ - --outdir \ - --platform illumina \ - --protocol metagenomic \ - --genome 'MN908947.3' \ - -profile - ``` - - * Typical command for Illumina amplicon analysis: - - ```bash - nextflow run nf-core/viralrecon \ - --input samplesheet.csv \ - --outdir \ - --platform illumina \ - --protocol amplicon \ - --genome 'MN908947.3' \ - --primer_set artic \ - --primer_set_version 3 \ - --skip_assembly \ - -profile - ``` - - * Typical command for Nanopore amplicon analysis: - - ```bash - nextflow run nf-core/viralrecon \ - --input samplesheet.csv \ - --outdir \ - --platform nanopore \ - --genome 'MN908947.3' \ - --primer_set_version 3 \ - --fastq_dir fastq_pass/ \ - --fast5_dir fast5_pass/ \ - --sequencing_summary sequencing_summary.txt \ - -profile - ``` - - * An executable Python script called [`fastq_dir_to_samplesheet.py`](https://github.com/nf-core/viralrecon/blob/master/bin/fastq_dir_to_samplesheet.py) has been provided if you are using `--platform illumina` and would like to auto-create an input samplesheet based on a directory containing FastQ files **before** you run the pipeline (requires Python 3 installed locally) e.g. - - ```console - wget -L https://raw.githubusercontent.com/nf-core/viralrecon/master/bin/fastq_dir_to_samplesheet.py - ./fastq_dir_to_samplesheet.py samplesheet.csv - ``` - - * You can find the default keys used to specify `--genome` in the [genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config). This provides default options for - * Reference genomes (including SARS-CoV-2) - * Genome associates primer sets - * [Nextclade datasets](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html) - - The Pangolin and Nextclade lineage and clade definitions change regularly as new SARS-CoV-2 lineages are discovered. For instructions to use more recent versions of lineage analysis tools like Pangolin and Nextclade please refer to the [updating containers](https://nf-co.re/viralrecon/usage#updating-containers) section in the usage docs. - - Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys; see [usage docs](https://nf-co.re/viralrecon/usage#illumina-primer-sets). + - Typical command for Illumina shotgun analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol metagenomic \ + --genome 'MN908947.3' \ + -profile + ``` + + - Typical command for Illumina amplicon analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol amplicon \ + --genome 'MN908947.3' \ + --primer_set artic \ + --primer_set_version 3 \ + --skip_assembly \ + -profile + ``` + + - Typical command for Nanopore amplicon analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform nanopore \ + --genome 'MN908947.3' \ + --primer_set_version 3 \ + --fastq_dir fastq_pass/ \ + --fast5_dir fast5_pass/ \ + --sequencing_summary sequencing_summary.txt \ + -profile + ``` + + - An executable Python script called [`fastq_dir_to_samplesheet.py`](https://github.com/nf-core/viralrecon/blob/master/bin/fastq_dir_to_samplesheet.py) has been provided if you are using `--platform illumina` and would like to auto-create an input samplesheet based on a directory containing FastQ files **before** you run the pipeline (requires Python 3 installed locally) e.g. + + ```console + wget -L https://raw.githubusercontent.com/nf-core/viralrecon/master/bin/fastq_dir_to_samplesheet.py + ./fastq_dir_to_samplesheet.py samplesheet.csv + ``` + + - You can find the default keys used to specify `--genome` in the [genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config). This provides default options for + + - Reference genomes (including SARS-CoV-2) + - Genome associates primer sets + - [Nextclade datasets](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html) + + The Pangolin and Nextclade lineage and clade definitions change regularly as new SARS-CoV-2 lineages are discovered. For instructions to use more recent versions of lineage analysis tools like Pangolin and Nextclade please refer to the [updating containers](https://nf-co.re/viralrecon/usage#updating-containers) section in the usage docs. + + Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys; see [usage docs](https://nf-co.re/viralrecon/usage#illumina-primer-sets). ## Documentation @@ -170,7 +172,7 @@ The key steps in the Nanopore implementation of the pipeline are carried out usi Many thanks to others who have helped out and contributed along the way too, including (but not limited to)\*: | Name | Affiliation | -|-----------------------------------------------------------|---------------------------------------------------------------------------------------| +| --------------------------------------------------------- | ------------------------------------------------------------------------------------- | | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | @@ -217,4 +219,4 @@ You can cite the `nf-core` publication as follows: > > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > -> *Nat Biotechnol.* 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/email_template.html b/assets/email_template.html index 5d19dcd9..1bd952cd 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,27 +12,124 @@ -

nf-core/viralrecon v${version}

-

Run Name: $runName

+

nf-core/viralrecon v${version}

+

Run Name: $runName

-<% if (!success) { - out << """ -
-

nf-core/viralrecon execution completed unsuccessfully!

+ <% if (!success) { out << """ +
+

nf-core/viralrecon execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ -} else if (fail_mapped_reads.size() > 0) { - out << """ -
-

nf-core/viralrecon execution completed with warnings!

-

The pipeline finished successfully, but the following samples were skipped due to failing the minimum mapped read threshold (< ${min_mapped_reads}):

+
${errorReport}
+
+ """ } else if (fail_mapped_reads.size() > 0) { out << """ +
+

nf-core/viralrecon execution completed with warnings!

+

+ The pipeline finished successfully, but the following samples were skipped due to failing the minimum mapped + read threshold (< ${min_mapped_reads}): +

    -
  • ${fail_mapped_reads.sort().join('
  • ')}
  • +
  • ${fail_mapped_reads.sort().join('
  • +
  • ')}
-

+

+
+ + """ } else { out << """ +
+ nf-core/viralrecon execution completed successfully! +
+ """ } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
+$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> " + + + + + " }.join("\n") %> + +
+ $k + +
$v
+
+ +

nf-core/viralrecon

+

https://github.com/nf-core/viralrecon

""" } else { diff --git a/assets/multiqc_config_illumina.yaml b/assets/multiqc_config_illumina.yml similarity index 99% rename from assets/multiqc_config_illumina.yaml rename to assets/multiqc_config_illumina.yml index 3cff53aa..1979a953 100644 --- a/assets/multiqc_config_illumina.yaml +++ b/assets/multiqc_config_illumina.yml @@ -283,6 +283,7 @@ extra_fn_clean_exts: - ".markduplicates" - ".unclassified" - "_MN908947.3" + - " MN908947.3" extra_fn_clean_trim: - "Consensus_" diff --git a/assets/multiqc_config_nanopore.yaml b/assets/multiqc_config_nanopore.yml similarity index 100% rename from assets/multiqc_config_nanopore.yaml rename to assets/multiqc_config_nanopore.yml diff --git a/assets/schema_input.json b/assets/schema_input.json index 9e9d343e..3d255e41 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -36,8 +36,6 @@ "errorMessage": "Barcode must be provided and must be an integer" } }, - "required": [ - "sample" - ] + "required": ["sample"] } } diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d1038f9a..261ddb95 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -99,11 +99,6 @@ def check_illumina_samplesheet(file_in, file_out): f"WARNING: Spaces have been replaced by underscores for sample: {sample}" ) sample = sample.replace(" ", "_") - if sample.find("-") != -1: - print( - f"WARNING: Dashes have been replaced by underscores for sample: {sample}" - ) - sample = sample.replace("-", "_") if not sample: print_error("Sample entry has not been specified!", "Line", line) @@ -279,4 +274,4 @@ def main(args=None): if __name__ == "__main__": - sys.exit(main()) + sys.exit(main()) \ No newline at end of file diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index d3f126ea..a900f502 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -5,7 +5,11 @@ import re import errno import argparse +from collections import OrderedDict +from collections import deque + import numpy as np +from Bio import SeqIO from scipy.stats import fisher_exact @@ -34,367 +38,540 @@ def parse_args(args=None): "--ignore_strand_bias", default=False, help="Does not take strand bias into account, use this option when not using amplicon sequencing.", - action="store_true" + action="store_true", ) parser.add_argument( "-ic", "--ignore_merge_codons", help="Output variants without taking into account if consecutive positions belong to the same codon.", - action="store_true" + action="store_true", + ) + parser.add_argument( + "-f", + "--fasta", + type=str, + default=None, + help="Fasta file used in mapping and variant calling for vcf header reference genome lenght info.", ) - return parser.parse_args(args) +def make_dir(path): + """ + Description: + Create directory if it doesn't exist. + Input: + path - path where the directory will be created. + Returns: + None + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +def parse_ivar_line(line): + """ + Description: + Parse ivar line to get needed variables for vcf format. + input: + line - ivar tsv line + return: + CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type + """ + line = re.split("\t", line) + + ## Assign intial fields to variables + CHROM = line[0] + POS = line[1] + ID = "." + REF = line[2] + ALT = line[3] + + ## REF/ALF depths and quals + REF_DP = int(line[4]) + REF_RV = int(line[5]) + REF_FW = REF_DP - REF_RV + REF_QUAL = int(line[6]) + ALT_RV = int(line[8]) + ALT_DP = int(line[7]) + ALT_FW = ALT_DP - ALT_RV + ALT_QUAL = int(line[9]) + ALT_FREQ = float(line[10]) + FORMAT = [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + + ## Codon annotation + REF_CODON = line[15] + ALT_CODON = line[17] + + ## Determine variant type + var_type = "SNP" + if ALT[0] == "+": + ALT = REF + ALT[1:] + var_type = "INS" + elif ALT[0] == "-": + REF += ALT[1:] + ALT = line[2] + var_type = "DEL" + + QUAL = "." + + ## Determine FILTER field + INFO = f"DP={line[11]}" + pass_test = line[13] + + return ( + CHROM, + POS, + ID, + REF, + ALT, + QUAL, + INFO, + FORMAT, + REF_CODON, + ALT_CODON, + pass_test, + var_type, + ) + + +###################### +## FILTER FUNCTIONS ## +###################### + + +def ivar_filter(pass_test): + """ + Description: + process ivar filter into vcf filter format. + input: + pass_test - ivar fisher exact test [ True, False ] + return: + Whether it passes the filter or not. [False, "ft"] + """ + if pass_test == "TRUE": + return False + else: + return "ft" + + +def strand_bias_filter(format): + """ + Description: + Calculate strand-bias fisher test. + input: + format - format variables + return: + Whether it passes the filter or not. [False, "sb"] + """ + # format=[REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + # table: + ## REF_FW REF_RV + ## ALT_FW ALT_RV + table = np.array( + [[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]] + ) + oddsr, pvalue = fisher_exact(table, alternative="greater") + + # h0: both strands are equally represented. + # If test is significant h0 is refused so there is an strand bias. + if pvalue < 0.05: + return "sb" + else: + return False + + +def write_vcf_header(ref, ignore_strand_bias, file_out, filename): + """ + Description: + Write vcf header for VCFv4.2 + input: + ref - (optional), ref in fasta format + ignore_strand_bias - if no strand-bias is calculated [True, False] + file_out - output file_in + filename - name of the output file + return: + Nothing. + """ + ## Define VCF header + header_source = ["##fileformat=VCFv4.2", "##source=iVar"] + if ref: + header_contig = [] + for record in SeqIO.parse(ref, "fasta"): + header_contig += [ + "##contig=" + ] + + header_source += header_contig + + header_info = ['##INFO='] + header_filter = [ + '##FILTER=', + '##FILTER= 0.05">', + ] + header_format = [ + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + ] + header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"] + if not ignore_strand_bias: + header_filter += [ + '##FILTER=' + ] + + header = header_source + header_info + header_filter + header_format + header_cols + fout = open(file_out, "w") + fout.write("\n".join(header) + "\n") + fout.close() + + +def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_out): + """ + Description: + Format variables into vcf line format and write line to file. + input: + chrom, pos, id, ref, alt, filter, qual, info, format - vcf variables + file_out - file output + return: + Nothing. + """ + sample = f'1:{":".join(str(x) for x in format)}' + format = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" + + oline = ( + chrom + + "\t" + + pos + + "\t" + + id + + "\t" + + ref + + "\t" + + alt + + "\t" + + qual + + "\t" + + filter + + "\t" + + info + + "\t" + + format + + "\t" + + sample + + "\n" + ) + fout = open(file_out, "a") + fout.write(oline) + fout.close() + + +############################ +## MERGE CODONS FUNCTIONS ## +############################ + + def check_consecutive(mylist): - ''' + """ Description: - This function checks if a list of three or two numbers are consecutive and returns how many items are consecutive. + This function checks a list of numbers and returns how many items are consecutive. input: my_list - A list of integers return: - Number of items consecutive in the list - [False, 1, 2] - ''' + Number of items consecutive in the list - [False, 2, 3,..] + """ my_list = list(map(int, mylist)) - ## Check if the list contains consecutive numbers - if sorted(my_list) == list(range(min(my_list), max(my_list)+1)): + if len(my_list) == 1: + return False + elif sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): return len(my_list) else: ## If not, and the list is > 1, remove the last item and reevaluate. - if len(my_list) > 1: + if len(my_list) > 2: my_list.pop() - if sorted(my_list) == list(range(min(my_list), max(my_list)+1)): + if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): return len(my_list) else: return False return False -def codon_position(seq1,seq2): - ''' +def get_diff_position(seq1, seq2): + """ Description: Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. Input: - seq1 - list size 3 [A,T,C,G] - seq2 - list size 3 [A,T,C,G] + seq1 - string size 3 [A,T,C,G]. Ex. "ATC" + seq2 - string size 3 [A,T,C,G]. Ex. "ACC" Returns: Returns position where seq1 != seq2 - ''' + """ if seq1 == "NA": return False ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] if len(ind_diff) > 1: - print("There has been an issue, more than one difference between the seqs.") - return False + print("There has been an issue, more than one difference between the seqs.") + return False else: return ind_diff[0] -def rename_vars(dict_lines,num_collapse): - ''' +def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): + """ + Description: + Logic for determine if variant lines need to be collapsed into one determining + if they are consecutive and belong to the same codon. + Input: + qpos - list of positions. Ex. [4441, 4442, 4443] + fe_codon_ref - first position codon annotation for ref. Ex. "ATG" + fe_codon_alt - first position codon annotation for alt. Ex. "AGG" + Returns: + Returns num_collapse. Number of lines that need to be collapsed into one. + """ + # Are two positions in the queue consecutive? + # q_pos = [4441, 4442, 5067] + num_collapse = 0 + if check_consecutive(list(q_pos)) == 2: + ## If the first position is not on the third position of the codon they are in the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: + num_collapse = 2 + else: + num_collapse = 1 + # Are the three positions in the queue consecutive? + # q_pos = [4441, 4442, 4443] + elif check_consecutive(list(q_pos)) == 3: + ## we check the first position in which codon position is to process it acordingly. + # If first position is in the first codon position all three positions belong to the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: + num_collapse = 3 + # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 1: + num_collapse = 2 + ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 2: + num_collapse = 1 + # If no consecutive process only one line. + elif check_consecutive(list(q_pos)) == False: + num_collapse = 1 + + return num_collapse + + +def process_variants(variants, num_collapse): + """ Description: - The function set the vars acordingly to the lines to collapse do to consecutive variants. + The function set the variables acordingly to the lines to collapse do to consecutive variants. Input: - dict_lines - Dict with var lines. + variants - Dict with var lines. num_collapse - number of lines to collapse [2,3] Returns:: - Vars fixed. - ''' - CHROM = dict_lines["CHROM"][0] - POS = dict_lines["POS"][0] - ID = dict_lines["ID"][0] - # If two consecutive collapse 2 lines into one. - if int(num_collapse) == 2: - REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) - ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) - # If three consecutive collapse 3 lines into one. - elif int(num_collapse) == 3: - REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) + str(dict_lines["REF"][2]) - ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) + str(dict_lines["ALT"][2]) - ## TODO Check how much differences we found among DPs in the three positions of a codon. - REF_DP = dict_lines["REF_DP"][0] - REF_RV = dict_lines["REF_RV"][0] - ALT_DP = dict_lines["ALT_DP"][0] - ALT_RV = dict_lines["ALT_RV"][0] - QUAL = dict_lines["QUAL"][0] - REF_CODON = REF - ALT_CODON = ALT - FILTER =dict_lines["FILTER"][0] - # INFO DP depends on the decision in the todo above. SB is left with the first one. - INFO = dict_lines["INFO"][0] - FORMAT = dict_lines["FORMAT"][0] - # sample depends on the decision in the todo above. - SAMPLE = dict_lines["SAMPLE"][0] - return CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,SAMPLE + Vars fixed: chrom, pos, id, ref, alt, qual, filter, info, format + """ + # Collapsed variant parameters equal to first variant + key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] + chrom, pos, id, qual, filter, info, format = [ + variants[next(iter(variants))][key] for key in key_list + ] + # If no consecutive, process one variant line + # If two consecutive, process two variant lines into one + # If three consecutive process three variant lines and write one + ref = "" + alt = "" + iter_variants = iter(variants) + for i in range(num_collapse): + var = next(iter_variants) + ref += variants[var]["ref"] + alt += variants[var]["alt"] -def make_dir(path): - ''' - Description: - Create directory if it doesn't exist. - Input: - path - path where the directory will be created. - Returns: - None - ''' - if not len(path) == 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise + return chrom, pos, id, ref, alt, qual, filter, info, format -def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequency=0, ignore_strand_bias=False, ignore_merge_codons=False): - ''' - Description: - Main function to convert iVar variants TSV to VCF. - Input: - file_in : iVar variants TSV file - file_out : VCF output file - pass_only : Only keep variants that PASS filter [True, False] - min_allele_freq : Minimum allele frequency to keep a variant [0] - ignore_strand_bias : Do not apply strand-bias filter [True, False] - ignore_merge_codons : Do not take into account consecutive positions belong to the same codon. - Returns: - None - ''' - ## Create output directory - filename = os.path.splitext(file_in)[0] - out_dir = os.path.dirname(file_out) +def main(args=None): + # Process args + args = parse_args(args) + + # Initialize vars + filename = os.path.splitext(args.file_in)[0] + out_dir = os.path.dirname(args.file_out) + var_list = [] # store variants + var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} # variant counts + variants = OrderedDict() # variant dict (merge codon) + q_pos = deque([], maxlen=3) # pos fifo queue (merge codon) + + # Create output directory make_dir(out_dir) - ## Define VCF header - header_source = [ - "##fileformat=VCFv4.2", - "##source=iVar" - ] - header_info = [ - '##INFO=' - ] - header_filter = [ - '##FILTER=', - '##FILTER= 0.05">' - ] - header_format = [ - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - '##FORMAT=', - ] - header_cols = [ - f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}" - ] - if not ignore_strand_bias: - header_info += [ - '##INFO=' - ] - header_filter += [ - '##FILTER=' - ] - header = header_source + header_info + header_filter + header_format + header_cols + ############################## + ## Write vcf header to file ## + ############################## + write_vcf_header(args.fasta, args.ignore_strand_bias, args.file_out, filename) - ## Initialise variables - var_list = [] - var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} - dict_lines = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER': [], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} - write_line = False - fout = open(file_out, "w") - fout.write('\n'.join(header) + '\n') - with open(file_in, 'r') as fin: + ################################# + ## Read and process input file ## + ################################# + with open(args.file_in, "r") as fin: for line in fin: if not re.match("REGION", line): - line = re.split("\t", line) - - ## Assign intial fields to variables - CHROM = line[0] - POS = line[1] - ID = "." - REF = line[2] - ALT = line[3] - - ## REF/ALF depths - REF_DP = int(line[4]) - REF_RV = int(line[5]) - REF_FW = REF_DP - REF_RV - ALT_RV = int(line[8]) - ALT_DP = int(line[7]) - ALT_FW = ALT_DP - ALT_RV - - ## Perform a fisher_exact test for strand bias detection - table = np.array([[REF_FW, REF_RV], [ALT_FW, ALT_RV]]) - oddsr, pvalue = fisher_exact(table, alternative='greater') - - ## Determine variant type - var_type = "SNP" - if ALT[0] == "+": - ALT = REF + ALT[1:] - var_type = "INS" - elif ALT[0] == "-": - REF += ALT[1:] - ALT = line[2] - var_type = "DEL" - - QUAL = "." - - ## Determine FILTER field - INFO = f"DP={line[11]}" - pass_test = line[13] - if ignore_strand_bias: - if pass_test == "TRUE": - FILTER = "PASS" - else: - FILTER = "ft" - else: - ## Add SB in the FILTER field if strand-bias p-value is significant - if pvalue < 0.05 and pass_test == "TRUE": - FILTER = "sb" - elif pvalue > 0.05 and pass_test == "TRUE": - FILTER = "PASS" - elif pvalue <= 0.05 and pass_test == "FALSE": - FILTER = "ft;sb" - else: - FILTER = "ft" - INFO += f":SB_PV={str(round(pvalue, 5))}" - - FORMAT = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" - SAMPLE = f'1:{":".join(line[4:11])}' - - REF_CODON = line[15] - ALT_CODON = line[17] - param_list = [CHROM, POS, ID, REF, ALT, REF_DP, REF_RV, ALT_DP, ALT_RV, QUAL, REF_CODON, ALT_CODON, FILTER, INFO, FORMAT, SAMPLE] - - if ignore_merge_codons or var_type != "SNP": - write_line = True - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - - else: - ## dict_lines contains all the informative fields for 3 positions in the vcf. - # dict_lines has a maximum size of three. - - ## Always fill dict_lines until size 2. - if len(dict_lines["POS"]) == 0 or len(dict_lines["POS"]) == 1: - for i,j in enumerate(dict_lines): - dict_lines.setdefault(j, []).append(param_list[i]) - write_line=False - - # If queue has size 2, we include the third line - elif len(dict_lines["POS"]) == 2: - for i,j in enumerate(dict_lines): - dict_lines.setdefault(j, []).append(param_list[i]) - # Are two positions in the dict consecutive? - if check_consecutive(dict_lines["POS"]) == 2: - ## If the first position is not on the third position of the codon they are in the same codon. - if codon_position(dict_lines["REF_CODON"][0],dict_lines["ALT_CODON"][0]) != 2: - write_line = True - num_collapse = "2" - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - ## We removed the first two items in dict_lines with have been just processed. - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - else: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - - # Are the three positions in the dict consecutive? - elif check_consecutive(dict_lines["POS"]) == 3: - ## we check the first position in which codon position is to process it acordingly. - # If first position is in the first codon position all three positions belong to the same codon. - if codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 0: - write_line = True - num_collapse = 3 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - # we empty the dict_lines - dict_lines = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER':[], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} - # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 1: - write_line = True - num_collapse = 2 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 2: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - - elif check_consecutive(dict_lines["POS"]) == False: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - else: - print("Something went terribly wrong!!" + str(len(dict_lines["POS"]))) - ## Determine whether to output variant - if pass_only and FILTER != "PASS": + ################ + ## Parse line ## + ################ + ## format= + # [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + write_line = True + ( + chrom, + pos, + id, + ref, + alt, + qual, + info, + format, + ref_codon, + alt_codon, + pass_test, + var_type, + ) = parse_ivar_line(line) + ##################### + ## Process filters ## + ##################### + ## ivar fisher test + filter = "" + if ivar_filter(pass_test): + filter = ivar_filter(pass_test) + ## strand-bias fisher test + if not args.ignore_strand_bias: + if strand_bias_filter(format): + if filter: + filter += ";" + strand_bias_filter(format) + else: + filter = strand_bias_filter(format) + + if not filter: + filter = "PASS" + + ##################### + ## Filter variants ## + ##################### + if args.pass_only and filter != "PASS": write_line = False - if float(line[10]) < min_allele_frequency: + ### AF filtering. ALT_DP/(ALT_DP+REF_DP) + if ( + float(format[3] / (format[0] + format[3])) + < args.allele_freq_threshold + ): write_line = False - if (CHROM, POS, REF, ALT) in var_list: + ### Duplication filter + if (chrom, pos, ref, alt) in var_list: write_line = False else: - var_list.append((CHROM, POS, REF, ALT)) + var_list.append((chrom, pos, ref, alt)) + + ############################################################ + ## MERGE_CODONS ## + ## Merge consecutive variants belonging to the same codon ## + ############################################################ + if not args.ignore_merge_codons and var_type == "SNP": + ## re-fill queue and dict accordingly + q_pos.append(pos) + variants[(chrom, pos, ref, alt)] = { + "chrom": chrom, + "pos": pos, + "id": id, + "ref": ref, + "alt": alt, + "qual": qual, + "filter": filter, + "info": info, + "format": format, + "ref_codon": ref_codon, + "alt_codon": alt_codon, + } + + if len(q_pos) == q_pos.maxlen: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] + num_collapse = check_merge_codons( + q_pos, fe_codon_ref, fe_codon_alt + ) + ( + chrom, + pos, + id, + ref, + alt, + qual, + filter, + info, + format, + ) = process_variants(variants, num_collapse) + + ## Empty variants dict and queue accordingly + for i in range(num_collapse): + variants.popitem(last=False) + q_pos.popleft() + else: + write_line = False - ## Write to file + ############################## + ## Write output to vcf file ## + ############################## if write_line: var_count_dict[var_type] += 1 - fout.write(oline) - - ## Print variant counts to pass to MultiQC + write_vcf_line( + chrom, + pos, + id, + ref, + alt, + filter, + qual, + info, + format, + args.file_out, + ) + + if not args.ignore_merge_codons: + ####################### + ## handle last lines ## + ####################### + while len(q_pos) > 0: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants( + variants, num_collapse + ) + + var_count_dict[var_type] += 1 + write_vcf_line( + chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out + ) + ## Empty variants dict and queue accordingly + for i in range(num_collapse): + variants.popitem(last=False) + q_pos.popleft() + + ############################################# + ## variant counts to pass to MultiQC ## + ############################################# var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] print("\t".join(["sample"] + [x[0] for x in var_count_list])) print("\t".join([filename] + [x[1] for x in var_count_list])) - ## Handle last 3 lines. - if len(dict_lines["POS"]) == 2: - if check_consecutive(dict_lines["POS"]) == 2: - if codon_position(dict_lines["REF_CODON"][0],dict_lines["ALT_CODON"][0]) != 2: - write_line = True - num_collapse = 2 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - fout.write(oline) - else: - oline = (dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - oline1 = (dict_lines["CHROM"][1] + "\t" + dict_lines["POS"][1] + "\t" + dict_lines["ID"][1] + "\t" + dict_lines["REF"][1] + "\t" + dict_lines["ALT"][1] + "\t" + dict_lines["QUAL"][1] + "\t" + dict_lines["FILTER"][1] + "\t" + dict_lines["INFO"][1] + "\t" + dict_lines["FORMAT"][1] + "\t" + dict_lines["SAMPLE"][1] + "\n") - fout.write(oline) - fout.write(oline1) - elif len(dict_lines["POS"]) == 1: - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - fout.write(oline) - fout.close() - - -def main(args=None): - args = parse_args(args) - ivar_variants_to_vcf( - args.file_in, - args.file_out, - args.pass_only, - args.allele_freq_threshold, - args.ignore_strand_bias, - args.ignore_merge_codons, - ) - if __name__ == "__main__": sys.exit(main()) diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py index dac8b7ae..90a4c21a 100755 --- a/bin/multiqc_to_custom_csv.py +++ b/bin/multiqc_to_custom_csv.py @@ -239,7 +239,7 @@ def main(args=None): "multiqc_pangolin.yaml", [("Pangolin lineage", ["lineage"])], ), - ("multiqc_nextclade_clade.yaml", [("Nextclade clade", ["clade"])]), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), ] illumina_assembly_files = [ @@ -308,7 +308,7 @@ def main(args=None): ("multiqc_snpeff.yaml", [("# Missense variants", ["MISSENSE"])]), ("multiqc_quast.yaml", [("# Ns per 100kb consensus", ["# N's per 100 kbp"])]), ("multiqc_pangolin.yaml", [("Pangolin lineage", ["lineage"])]), - ("multiqc_nextclade_clade.yaml", [("Nextclade clade", ["clade"])]), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), ] if args.PLATFORM == "illumina": diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 0edea4dd..5cd076ae 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -122,7 +122,7 @@ if (!params.skip_kraken2) { publishDir = [ path: { "${params.outdir}/kraken2" }, mode: params.publish_dir_mode, - pattern: "*.txt" + pattern: "*report.txt" ] } } @@ -146,7 +146,7 @@ if (!params.skip_variants) { withName: 'BOWTIE2_ALIGN' { ext.args = '--local --very-sensitive-local --seed 1' - ext.args2 = '-F4' + ext.args2 = '-F4 -bhS' publishDir = [ [ path: { "${params.outdir}/variants/bowtie2/log" }, @@ -180,6 +180,7 @@ if (!params.skip_variants) { } withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -244,6 +245,7 @@ if (!params.skip_variants) { } withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.ivar_trim.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -257,7 +259,7 @@ if (!params.skip_variants) { process { withName: 'PICARD_MARKDUPLICATES' { ext.args = [ - 'ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp', + '--ASSUME_SORTED true --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp', params.filter_duplicates ? 'REMOVE_DUPLICATES=true' : '' ].join(' ').trim() ext.prefix = { "${meta.id}.markduplicates.sorted" } @@ -276,7 +278,6 @@ if (!params.skip_variants) { } withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.markduplicates.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: params.publish_dir_mode, @@ -285,6 +286,7 @@ if (!params.skip_variants) { } withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.markduplicates.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -297,7 +299,7 @@ if (!params.skip_variants) { if (!params.skip_picard_metrics) { process { withName: 'PICARD_COLLECTMULTIPLEMETRICS' { - ext.args = 'VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp' + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' publishDir = [ [ path: { "${params.outdir}/variants/bowtie2/picard_metrics" }, @@ -317,7 +319,7 @@ if (!params.skip_variants) { if (!params.skip_mosdepth) { process { withName: 'MOSDEPTH_GENOME' { - ext.args = '--fast-mode' + ext.args = '--fast-mode --by 200' publishDir = [ path: { "${params.outdir}/variants/bowtie2/mosdepth/genome" }, mode: params.publish_dir_mode, @@ -388,7 +390,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_IVAR:BCFTOOLS_SORT' { publishDir = [ path: { "${params.outdir}/variants/ivar" }, mode: params.publish_dir_mode, @@ -396,7 +398,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar" }, @@ -405,7 +407,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_IVAR:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/variants/ivar/bcftools_stats" }, mode: params.publish_dir_mode, @@ -665,7 +667,7 @@ if (!params.skip_variants) { publishDir = [ path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/nextclade" }, mode: params.publish_dir_mode, - pattern: "*.csv" + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } @@ -1048,7 +1050,10 @@ if (!params.skip_assembly) { if (!params.skip_multiqc) { process { withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/multiqc" }, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 4168ac7c..98d06e91 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -91,7 +91,6 @@ process { } withName: '.*:.*:.*:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, mode: params.publish_dir_mode, @@ -100,7 +99,7 @@ process { } withName: '.*:.*:.*:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.mapped.sorted" } + ext.prefix = { "${meta.id}.mapped.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/samtools_stats" }, mode: params.publish_dir_mode, @@ -168,7 +167,7 @@ if (!params.skip_mosdepth) { } withName: 'MOSDEPTH_GENOME' { - ext.args = '--fast-mode' + ext.args = '--fast-mode --by 200' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/genome" }, mode: params.publish_dir_mode, @@ -241,7 +240,7 @@ if (!params.skip_nextclade) { publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/nextclade" }, mode: params.publish_dir_mode, - pattern: "*.csv" + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } @@ -362,7 +361,10 @@ if (!params.skip_asciigenome) { if (!params.skip_multiqc) { process { withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, mode: params.publish_dir_mode, diff --git a/docs/README.md b/docs/README.md index 0e457111..ceaf7450 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/viralrecon documentation is split into the following pages: -* [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -* [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 8c8e8d8b..610e8a15 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,4 +1,3 @@ - # Introduction This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. @@ -7,24 +6,24 @@ The directories listed below will be created in the results directory after the # Nanopore: Pipeline overview -* [Preprocessing](#nanopore-preprocessing) - * [pycoQC](#nanopore-pycoqc) - Sequencing QC - * [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy - * [NanoPlot](#nanopore-nanoplot) - Read QC -* [Variant calling](#nanopore-variant-calling) - * [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence -* [Downstream analysis](#nanopore-downstream-analysis) - * [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics - * [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots - * [BCFTools](#nanopore-bcftools) - Variant count metrics - * [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#nanopore-quast) - Consensus assessment report - * [Pangolin](#nanopore-pangolin) - Lineage analysis - * [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks - * [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks - * [Variants long table](#nanopore-variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis -* [Workflow reporting](#nanopore-workflow-reporting) - * [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results +- [Preprocessing](#nanopore-preprocessing) + - [pycoQC](#nanopore-pycoqc) - Sequencing QC + - [artic guppyplex](#nanopore-artic-guppyplex) - Aggregate pre-demultiplexed reads from MinKNOW/Guppy + - [NanoPlot](#nanopore-nanoplot) - Read QC +- [Variant calling](#nanopore-variant-calling) + - [artic minion](#nanopore-artic-minion) - Align reads, call variants and generate consensus sequence +- [Downstream analysis](#nanopore-downstream-analysis) + - [SAMtools](#nanopore-samtools) - Remove unmapped reads and obtain alignment metrics + - [mosdepth](#nanopore-mosdepth) - Genome-wide and amplicon coverage QC plots + - [BCFTools](#nanopore-bcftools) - Variant count metrics + - [SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + - [QUAST](#nanopore-quast) - Consensus assessment report + - [Pangolin](#nanopore-pangolin) - Lineage analysis + - [Nextclade](#nanopore-nextclade) - Clade assignment, mutation calling and sequence quality checks + - [ASCIIGenome](#nanopore-asciigenome) - Individual variant screenshots with annotation tracks + - [Variants long table](#nanopore-variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis +- [Workflow reporting](#nanopore-workflow-reporting) + - [MultiQC](#nanopore-multiqc) - Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ## Nanopore: Preprocessing @@ -35,8 +34,8 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `pycoqc/` - * `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. +- `pycoqc/` + - `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode.
@@ -49,8 +48,8 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `guppyplex/` - * `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. +- `guppyplex/` + - `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. ```nextflow params { @@ -71,8 +70,8 @@ The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool fro
Output files -* `nanoplot//` - * Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. +- `nanoplot//` + - Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots.
@@ -87,21 +86,21 @@ The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool fro
Output files -* `/` - * `*.consensus.fasta`: Consensus fasta file generated by artic minion. - * `*.pass.unique.vcf.gz`: VCF file containing unique variants passing quality filters. - * `*.pass.unique.vcf.gz.tbi`: VCF index file containing unique variants passing quality filters. - * `*.pass.vcf.gz`: VCF file containing variants passing quality filters. - * `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. - * `*.primers.vcf`: VCF file containing variants found in primer-binding regions. - * `*.merged.vcf`: VCF file containing all detected variants. - * `*.fail.vcf`: VCF file containing variants failing quality filters. - * `*.sorted.bam`: BAM file generated by initial alignment. - * `*.sorted.bam.bai`: BAM index file generated by initial alignment. - * `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. - * `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. - * `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. +- `/` + - `*.consensus.fasta`: Consensus fasta file generated by artic minion. + - `*.pass.unique.vcf.gz`: VCF file containing unique variants passing quality filters. + - `*.pass.unique.vcf.gz.tbi`: VCF index file containing unique variants passing quality filters. + - `*.pass.vcf.gz`: VCF file containing variants passing quality filters. + - `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. + - `*.primers.vcf`: VCF file containing variants found in primer-binding regions. + - `*.merged.vcf`: VCF file containing all detected variants. + - `*.fail.vcf`: VCF file containing variants failing quality filters. + - `*.sorted.bam`: BAM file generated by initial alignment. + - `*.sorted.bam.bai`: BAM index file generated by initial alignment. + - `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. + - `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. + - `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. + - `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -116,11 +115,11 @@ The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from t
Output files -* `/` - * `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `/samtools_stats/` - * SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. +- `/` + - `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + - `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. +- `/samtools_stats/` + - SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -135,17 +134,17 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur
Output files -* `/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `/mosdepth/genome/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + - `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + - `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `/mosdepth/amplicon/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + - `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + - `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + - `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -164,8 +163,8 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur
Output files -* `/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. +- `/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -180,15 +179,15 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur
Output files -* `/snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `/snpeff/bcftools_stats/` - * `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. +- `/snpeff/` + - `*.snpeff.csv`: Variant annotation csv file. + - `*.snpeff.genes.txt`: Gene table for annotated variants. + - `*.snpeff.summary.html`: Summary html file for variants. + - `*.snpeff.vcf.gz`: VCF file with variant annotations. + - `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + - `*.snpsift.txt`: SnpSift summary table. +- `/snpeff/bcftools_stats/` + - `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -205,8 +204,8 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur
Output files -* `/quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. +- `/quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -219,8 +218,8 @@ BAM files containing the original alignments from either Minimap2 or BWA are fur
Output files -* `/pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. +- `/pangolin/` + - `*.pangolin.csv`: Lineage analysis results from Pangolin. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -233,8 +232,8 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi
Output files -* `/nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. +- `/nextclade/` + - `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -247,8 +246,8 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi
Output files -* `/asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. +- `/asciigenome//` + - `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -263,8 +262,8 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs
Output files -* `/` - * `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. +- `/` + - `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). @@ -289,10 +288,10 @@ SAMPLE1_PE,MN908947.3,11719,G,A,PASS,195,9,186,0.95,orf1ab,synonymous_variant,c.
Output files -* `multiqc//` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. +- `multiqc//` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report.
@@ -308,36 +307,36 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t # Illumina: Pipeline overview -* [Preprocessing](#illumina-preprocessing) - * [cat](#cat) - Merge re-sequenced FastQ files - * [FastQC](#fastqc) - Raw read QC - * [fastp](#fastp) - Adapter and quality trimming - * [Kraken 2](#kraken-2) - Removal/QC for host reads -* [Variant calling](#illumina-variant-calling) - * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome - * [SAMtools](#samtools) - Sort, index and generate metrics for alignments - * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data - * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal - * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics - * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - * [iVar variants](#ivar-variants) *||* [BCFTools call](#bcftools-call) - Variant calling - * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks - * [iVar consensus](#ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Consensus sequence generation - * [QUAST](#quast) - Consensus assessment report - * [Pangolin](#pangolin) - Lineage analysis - * [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks - * [Variants long table](#variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis -* [De novo assembly](#illumina-de-novo-assembly) - * [Cutadapt](#cutadapt) - Primer trimming for amplicon data - * [SPAdes](#spades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly - * [BLAST](#blast) - Blast to reference assembly - * [ABACAS](#abacas) - Order contigs according to reference genome - * [PlasmidID](#plasmidid) - Assembly report and visualisation - * [Assembly QUAST](#assembly-quast) - Assembly quality assessment -* [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) - * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling - * [Reference genome files](#reference-genome-files) - Save reference genome indices/files +- [Preprocessing](#illumina-preprocessing) + - [cat](#cat) - Merge re-sequenced FastQ files + - [FastQC](#fastqc) - Raw read QC + - [fastp](#fastp) - Adapter and quality trimming + - [Kraken 2](#kraken-2) - Removal/QC for host reads +- [Variant calling](#illumina-variant-calling) + - [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome + - [SAMtools](#samtools) - Sort, index and generate metrics for alignments + - [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data + - [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal + - [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Alignment metrics + - [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics + - [iVar variants](#ivar-variants) _||_ [BCFTools call](#bcftools-call) - Variant calling + - [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + - [ASCIIGenome](#asciigenome) - Individual variant screenshots with annotation tracks + - [iVar consensus](#ivar-consensus) _||_ [BCFTools and BEDTools](#bcftools-and-bedtools) - Consensus sequence generation + - [QUAST](#quast) - Consensus assessment report + - [Pangolin](#pangolin) - Lineage analysis + - [Nextclade](#nextclade) - Clade assignment, mutation calling and sequence quality checks + - [Variants long table](#variants-long-table) - Collate per-sample information for individual variants, functional effect prediction and lineage analysis +- [De novo assembly](#illumina-de-novo-assembly) + - [Cutadapt](#cutadapt) - Primer trimming for amplicon data + - [SPAdes](#spades) _||_ [Unicycler](#unicycler) _||_ [minia](#minia) - Viral genome assembly + - [BLAST](#blast) - Blast to reference assembly + - [ABACAS](#abacas) - Order contigs according to reference genome + - [PlasmidID](#plasmidid) - Assembly report and visualisation + - [Assembly QUAST](#assembly-quast) - Assembly quality assessment +- [Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) + - [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling + - [Reference genome files](#reference-genome-files) - Save reference genome indices/files ## Illumina: Preprocessing @@ -346,8 +345,8 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t
Output files -* `fastq/` - * `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. +- `fastq/` + - `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. ```nextflow params { @@ -368,9 +367,9 @@ If multiple libraries/runs have been provided for the same sample in the input s
Output files -* `fastqc/raw/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `fastqc/raw/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. **NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. @@ -385,14 +384,14 @@ If multiple libraries/runs have been provided for the same sample in the input s
Output files -* `fastp/` - * `*.fastp.html`: Trimming report in html format. - * `*.fastp.json`: Trimming report in json format. -* `fastp/log/` - * `*.fastp.log`: Trimming log file. -* `fastqc/trim/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `fastp/` + - `*.fastp.html`: Trimming report in html format. + - `*.fastp.json`: Trimming report in json format. +- `fastp/log/` + - `*.fastp.log`: Trimming log file. +- `fastqc/trim/` + - `*_fastqc.html`: FastQC report of the trimmed reads. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
@@ -405,14 +404,14 @@ If multiple libraries/runs have been provided for the same sample in the input s
Output files -* `kraken2/` - * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. +- `kraken2/` + - `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format.
[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. -We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the *de novo* assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. +We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the _de novo_ assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. ![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) @@ -425,8 +424,8 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `variants/bowtie2/log/` - * `*.bowtie2.log`: Bowtie 2 mapping log file. +- `variants/bowtie2/log/` + - `*.bowtie2.log`: Bowtie 2 mapping log file.
@@ -439,11 +438,11 @@ A file called `summary_variants_metrics_mqc.csv` containing a selection of read
Output files -* `variants/bowtie2/` - * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. -* `variants/bowtie2/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. +- `variants/bowtie2/` + - `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + - `.sorted.bam.bai`: Index file for coordinate sorted BAM file. +- `variants/bowtie2/samtools_stats/` + - SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files.
@@ -456,13 +455,13 @@ Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourcef
Output files -* `variants/bowtie2/` - * `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. - * `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. -* `variants/bowtie2/log/` - * `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. +- `variants/bowtie2/` + - `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + - `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. +- `variants/bowtie2/samtools_stats/` + - SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. +- `variants/bowtie2/log/` + - `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout.
@@ -473,17 +472,17 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa
Output files -* `variants/bowtie2/` - * `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. - * `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. -* `variants/bowtie2/samtools_stats/` - * SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. -* `variants/bowtie2/picard_metrics/` - * `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. +- `variants/bowtie2/` + - `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. + - `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. +- `variants/bowtie2/samtools_stats/` + - SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. +- `variants/bowtie2/picard_metrics/` + - `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates.
-Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. +Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only _mark_ the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. ![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) @@ -492,10 +491,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
Output files -* `variants/bowtie2/picard_metrics/` - * `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. -* `variants/bowtie2/picard_metrics/pdf/` - * `*.pdf` plots for metrics obtained from CollectMultipleMetrics. +- `variants/bowtie2/picard_metrics/` + - `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. +- `variants/bowtie2/picard_metrics/pdf/` + - `*.pdf` plots for metrics obtained from CollectMultipleMetrics.
@@ -508,17 +507,17 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
Output files -* `variants/bowtie2/mosdepth/genome/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. - * `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. -* `variants/bowtie2/mosdepth/amplicon/` - * `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - * `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `variants/bowtie2/mosdepth/genome/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + - `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + - `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `variants/bowtie2/mosdepth/amplicon/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + - `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + - `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + - `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values.
@@ -535,14 +534,14 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
Output files -* `variants/ivar/` - * `*.tsv`: Original iVar variants in TSV format. - * `*.vcf.gz`: iVar variants in VCF format. Converted using custom `ivar_variants_to_vcf.py` python script. - * `*.vcf.gz.tbi`: iVar variants VCF index file. -* `variants/ivar/log/` - * `*.variant_counts.log`: Counts for type of variants called by iVar. -* `variants/ivar/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. +- `variants/ivar/` + - `*.tsv`: Original iVar variants in TSV format. + - `*.vcf.gz`: iVar variants in VCF format. Converted using custom `ivar_variants_to_vcf.py` python script. + - `*.vcf.gz.tbi`: iVar variants VCF index file. +- `variants/ivar/log/` + - `*.variant_counts.log`: Counts for type of variants called by iVar. +- `variants/ivar/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file.
@@ -557,15 +556,15 @@ iVar outputs a tsv format which is not compatible with downstream analysis such
Output files -* `variants/bcftools/` - * `*.vcf.gz`: Variants VCF file. - * `*.vcf.gz.tbi`: Variants VCF index file. -* `variants/bcftools/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. +- `variants/bcftools/` + - `*.vcf.gz`: Variants VCF file. + - `*.vcf.gz.tbi`: Variants VCF index file. +- `variants/bcftools/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file.
-[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. It is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. It is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and _de novo_ assembly steps of this pipeline to obtain basic statistics from the VCF output. ![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) @@ -574,15 +573,15 @@ iVar outputs a tsv format which is not compatible with downstream analysis such
Output files -* `variants//snpeff/` - * `*.snpeff.csv`: Variant annotation csv file. - * `*.snpeff.genes.txt`: Gene table for annotated variants. - * `*.snpeff.summary.html`: Summary html file for variants. - * `*.snpeff.vcf.gz`: VCF file with variant annotations. - * `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpsift.txt`: SnpSift summary table. -* `variants//snpeff/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. +- `variants//snpeff/` + - `*.snpeff.csv`: Variant annotation csv file. + - `*.snpeff.genes.txt`: Gene table for annotated variants. + - `*.snpeff.summary.html`: Summary html file for variants. + - `*.snpeff.vcf.gz`: VCF file with variant annotations. + - `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + - `*.snpsift.txt`: SnpSift summary table. +- `variants//snpeff/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -599,8 +598,8 @@ iVar outputs a tsv format which is not compatible with downstream analysis such
Output files -* `variants//asciigenome//` - * `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. +- `variants//asciigenome//` + - `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -615,15 +614,15 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs
Output files -* `variants//consensus/ivar/` - * `*.consensus.fa`: Consensus Fasta file generated by iVar. - * `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. -* `variants//consensus/ivar/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +- `variants//consensus/ivar/` + - `*.consensus.fa`: Consensus Fasta file generated by iVar. + - `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. +- `variants//consensus/ivar/base_qc/` + - `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + - `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + - `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + - `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + - `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -636,30 +635,30 @@ As described in the [iVar variants](#ivar-variants) section, iVar can be used in
Output files -* `variants//consensus/bcftools/` - * `*.consensus.fa`: Consensus fasta file generated by integrating the high allele-frequency variants called by iVar/BCFTools into the reference genome. - * `*.filtered.vcf.gz`: VCF file containing high allele-frequency variants (default: `>= 0.75`) that were integrated into the consensus sequence. - * `*.filtered.vcf.gz.tbi`: Variants VCF index file for high allele frequency variants. -* `variants//consensus/bcftools/base_qc/` - * `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. - * `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. - * `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. - * `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. - * `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. +- `variants//consensus/bcftools/` + - `*.consensus.fa`: Consensus fasta file generated by integrating the high allele-frequency variants called by iVar/BCFTools into the reference genome. + - `*.filtered.vcf.gz`: VCF file containing high allele-frequency variants (default: `>= 0.75`) that were integrated into the consensus sequence. + - `*.filtered.vcf.gz.tbi`: Variants VCF index file for high allele frequency variants. +- `variants//consensus/bcftools/base_qc/` + - `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + - `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + - `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + - `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + - `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic').
-[BCFTools](http://samtools.github.io/bcftools/bcftools.html) is used in the variant calling and *de novo* assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. In this pipeline, we use `samtools mpileup` to create a mask using low coverage positions, and `bedtools maskfasta` to mask the genome sequences based on these intervals. Finally, `bcftools consensus` is used to generate the consensus by projecting the high allele frequency variants onto the masked genome reference sequence. +[BCFTools](http://samtools.github.io/bcftools/bcftools.html) is used in the variant calling and _de novo_ assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. In this pipeline, we use `samtools mpileup` to create a mask using low coverage positions, and `bedtools maskfasta` to mask the genome sequences based on these intervals. Finally, `bcftools consensus` is used to generate the consensus by projecting the high allele frequency variants onto the masked genome reference sequence. ### QUAST
Output files -* `variants//consensus//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. +- `variants//consensus//quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). **NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). @@ -673,8 +672,8 @@ As described in the [iVar variants](#ivar-variants) section, iVar can be used in
Output files -* `variants//consensus//pangolin/` - * `*.pangolin.csv`: Lineage analysis results from Pangolin. +- `variants//consensus//pangolin/` + - `*.pangolin.csv`: Lineage analysis results from Pangolin. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). **NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). @@ -688,8 +687,8 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi
Output files -* `variants//consensus//nextclade/` - * `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. +- `variants//consensus//nextclade/` + - `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). **NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). @@ -703,8 +702,8 @@ Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://gi
Output files -* `variants//` - * `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. +- `variants//` + - `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. **NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). @@ -724,22 +723,22 @@ SAMPLE1_PE,MN908947.3,11719,G,A,PASS,195,9,186,0.95,orf1ab,synonymous_variant,c. ## Illumina: De novo assembly -A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and *de novo* assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. +A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and _de novo_ assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. ### Cutadapt
Output files -* `assembly/cutadapt/log/` - * `*.cutadapt.log`: Cutadapt log file generated from stdout. -* `assembly/cutadapt/fastqc/` - * `*_fastqc.html`: FastQC report of the trimmed reads. - * `*_fastqc.zip`: Zip archive containing the FastQC report. +- `assembly/cutadapt/log/` + - `*.cutadapt.log`: Cutadapt log file generated from stdout. +- `assembly/cutadapt/fastqc/` + - `*_fastqc.html`: FastQC report of the trimmed reads. + - `*_fastqc.zip`: Zip archive containing the FastQC report.
-In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. +In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the _de novo_ assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. ![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) @@ -748,13 +747,13 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly/spades//` - * `*.scaffolds.fa.gz`: SPAdes scaffold assembly. - * `*.contigs.fa.gz`: SPAdes assembly contigs. - * `*.assembly.gfa.gz`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. -* `assembly/spades//bandage/` - * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. +- `assembly/spades//` + - `*.scaffolds.fa.gz`: SPAdes scaffold assembly. + - `*.contigs.fa.gz`: SPAdes assembly contigs. + - `*.assembly.gfa.gz`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. +- `assembly/spades//bandage/` + - `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + - `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. **NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). @@ -762,19 +761,19 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri [SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. -[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. +[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising _de novo_ assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing _de novo_ assemblies. ### Unicycler
Output files -* `assembly/unicycler/` - * `*.scaffolds.fa.gz`: Unicycler scaffold assembly. - * `*.assembly.gfa.gz`: Unicycler assembly graph in GFA format. -* `assembly/unicycler/bandage/` - * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. - * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. +- `assembly/unicycler/` + - `*.scaffolds.fa.gz`: Unicycler scaffold assembly. + - `*.assembly.gfa.gz`: Unicycler assembly graph in GFA format. +- `assembly/unicycler/bandage/` + - `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + - `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format.
@@ -785,10 +784,10 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly/minia/` - * `*.contigs.fa`: Minia scaffold assembly. - * `*.unitigs.fa`: Minia unitigs fasta file. - * `*.h5`: Minia h5 output file. +- `assembly/minia/` + - `*.contigs.fa`: Minia scaffold assembly. + - `*.unitigs.fa`: Minia unitigs fasta file. + - `*.h5`: Minia h5 output file.
@@ -799,9 +798,9 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly//blastn/` - * `*.blastn.txt`: BLAST results against the target virus. - * `*.filter.blastn.txt`: Filtered BLAST results. +- `assembly//blastn/` + - `*.blastn.txt`: BLAST results against the target virus. + - `*.filter.blastn.txt`: Filtered BLAST results. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -814,16 +813,16 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly//abacas/` - * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. - * `*.abacas.crunch`: Comparison file. - * `*.abacas.fasta`: Ordered and orientated sequence file. - * `*.abacas.gaps`: Gap information. - * `*.abacas.gaps.tab`: Gap information in tab-delimited format. - * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. - * `*.abacas.tab`: Feature file - * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. -* `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. +- `assembly//abacas/` + - `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + - `*.abacas.crunch`: Comparison file. + - `*.abacas.fasta`: Ordered and orientated sequence file. + - `*.abacas.gaps`: Gap information. + - `*.abacas.gaps.tab`: Gap information in tab-delimited format. + - `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + - `*.abacas.tab`: Feature file + - `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. +- `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -836,11 +835,11 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly//plasmidid//` - * `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. - * `*_final_results.tab`: Summary file with reference coverage stats and contigs. - * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. - * `logs/`: Log files. +- `assembly//plasmidid//` + - `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. + - `*_final_results.tab`: Summary file with reference coverage stats and contigs. + - `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + - `logs/`: Log files. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). @@ -853,14 +852,14 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `assembly//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. +- `assembly//quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades').
-[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the _de novo_ assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. ![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) @@ -871,11 +870,11 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
Output files -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. - * `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and *de novo* assembly related metrics. The same metrics will also be added to the top of the MultiQC report. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + - `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and _de novo_ assembly related metrics. The same metrics will also be added to the top of the MultiQC report.
@@ -894,14 +893,14 @@ An example MultiQC report generated from a full-sized dataset can be viewed on t
Output files -* `genome/` - * `bowtie2/`: Bowtie 2 index for viral genome. - * `blast_db/`: BLAST database for viral genome. - * `kraken2_db/`: Kraken 2 database for host genome. - * `snpeff_db/`: SnpEff database for viral genome. - * `snpeff.config`: SnpEff config file for viral genome. - * Unzipped genome fasta file for viral genome - * Unzipped genome annotation GFF file for viral genome +- `genome/` + - `bowtie2/`: Bowtie 2 index for viral genome. + - `blast_db/`: BLAST database for viral genome. + - `kraken2_db/`: Kraken 2 database for host genome. + - `snpeff_db/`: SnpEff database for viral genome. + - `snpeff.config`: SnpEff config file for viral genome. + - Unzipped genome fasta file for viral genome + - Unzipped genome annotation GFF file for viral genome
@@ -912,10 +911,10 @@ A number of genome-specific files are generated by the pipeline because they are
Output files -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
diff --git a/docs/usage.md b/docs/usage.md index 0540585f..b8416d20 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -25,11 +25,11 @@ SAMPLE_1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz SAMPLE_2,AEG588A2_S4_L003_R1_001.fastq.gz, ``` -| Column | Description | -|-----------|-----------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | > **NB:** Dashes (`-`) and spaces in sample names are automatically converted to underscores (`_`) to avoid downstream issues in the pipeline. @@ -51,10 +51,10 @@ sample,barcode 70N209581,4 ``` -| Column | Description | -|-----------|-----------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name, one per barcode. | -| `barcode` | Barcode identifier attributed to that sample during multiplexing. Must be an integer. | +| Column | Description | +| --------- | ------------------------------------------------------------------------------------- | +| `sample` | Custom sample name, one per barcode. | +| `barcode` | Barcode identifier attributed to that sample during multiplexing. Must be an integer. | > **NB:** Dashes (`-`) and spaces in sample names are automatically converted to underscores (`_`) to avoid downstream issues in the pipeline. @@ -185,9 +185,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: ```console -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -226,25 +226,25 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -* `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -261,11 +261,11 @@ Whilst the default requirements set within the pipeline will hopefully work for For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: ```console -[62/149eb0] NOTE: Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' +[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) +Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) + Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) Command executed: STAR \ @@ -290,7 +290,7 @@ Tip: you can replicate the issue by changing to the process work dir and enterin ``` To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so based on the search results the file we want is `modules/nf-core/software/star/align/main.nf`. +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. @@ -299,14 +299,15 @@ The custom config below can then be provided to the pipeline via the [`-c`](#-c) ```nextflow process { - withName: STAR_ALIGN { + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { memory = 100.GB } } ``` -> **NB:** We specify just the process name i.e. `STAR_ALIGN` in the config file and not the full task name string that is printed to screen in the error message or on the terminal whilst the pipeline is running i.e. `RNASEQ:ALIGN_STAR:STAR_ALIGN`. -> You may get a warning suggesting that the process selector isn't recognised but you can ignore that if the process name has been specified correctly. This is something that needs to be fixed upstream in core Nextflow. +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. ### Updating containers @@ -320,35 +321,35 @@ For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) 3. Create the custom config accordingly: - * For Docker: + - For Docker: - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.1.17--pyhdfd78af_1' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.1.17--pyhdfd78af_1' + } + } + ``` - * For Singularity: + - For Singularity: - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.1.17--pyhdfd78af_1' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.1.17--pyhdfd78af_1' + } + } + ``` - * For Conda: + - For Conda: - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.1.17' - } - } - ``` + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.1.17' + } + } + ``` #### Nextclade @@ -358,43 +359,43 @@ You can use a similar approach to update the version of Nextclade used by the pi 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/nextclade?tag=latest&tab=tags) 3. Create the custom config accordingly: - * For Docker: + - For Docker: - ```nextflow - process { - withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { - container = 'quay.io/biocontainers/nextclade:1.10.1--h9ee0642_0' - } - } - ``` + ```nextflow + process { + withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { + container = 'quay.io/biocontainers/nextclade:1.10.1--h9ee0642_0' + } + } + ``` - * For Singularity: + - For Singularity: - ```nextflow - process { - withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { - container = 'https://depot.galaxyproject.org/singularity/nextclade:1.10.1--h9ee0642_0' - } - } - ``` + ```nextflow + process { + withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { + container = 'https://depot.galaxyproject.org/singularity/nextclade:1.10.1--h9ee0642_0' + } + } + ``` - * For Conda: + - For Conda: - ```nextflow - process { - withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { - conda = 'bioconda::nextclade=1.10.1' - } - } - ``` + ```nextflow + process { + withName: 'NEXTCLADE_DATASETGET|NEXTCLADE_RUN' { + conda = 'bioconda::nextclade=1.10.1' + } + } + ``` ##### Nextclade datasets -A [`nextclade dataset`](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html#nextclade-datasets) feature was introduced in [Nextclade CLI v1.3.0](https://github.com/nextstrain/nextclade/releases/tag/1.3.0) that fetches input genome files such as reference sequences and trees from a central dataset repository. We have uploaded Nextclade dataset [v2022-01-18](https://github.com/nextstrain/nextclade_data/releases/tag/2022-01-24--21-27-29--UTC) to [nf-core/test-datasets](https://github.com/nf-core/test-datasets/blob/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz?raw=true), and for reproducibility, this will be used by default if you specify `--genome 'MN908947.3'` when running the pipeline. However, there are a number of ways you can use a more recent version of the dataset: +A [`nextclade dataset`](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html#nextclade-datasets) feature was introduced in [Nextclade CLI v1.3.0](https://github.com/nextstrain/nextclade/releases/tag/1.3.0) that fetches input genome files such as reference sequences and trees from a central dataset repository. We have uploaded Nextclade dataset [v2022-06-14](https://github.com/nextstrain/nextclade_data/releases/tag/2022-06-16--16-03-24--UTC) to [nf-core/test-datasets](https://github.com/nf-core/test-datasets/blob/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz?raw=true), and for reproducibility, this will be used by default if you specify `--genome 'MN908947.3'` when running the pipeline. However, there are a number of ways you can use a more recent version of the dataset: -* Supply your own by setting: `--nextclade_dataset ` -* Let the pipeline create and use the latest version by setting: `--nextclade_dataset false --nextclade_dataset_tag false` -* Let the pipeline create and use a specific, tagged version by setting: `--nextclade_dataset false --nextclade_dataset_tag ` +- Supply your own by setting: `--nextclade_dataset ` +- Let the pipeline create and use the latest version by setting: `--nextclade_dataset false --nextclade_dataset_tag false` +- Let the pipeline create and use a specific, tagged version by setting: `--nextclade_dataset false --nextclade_dataset_tag ` The Nextclade dataset releases can be found on their [Github page](https://github.com/nextstrain/nextclade_data/releases). Use the tag specified for each release e.g `2022-01-18T12:00:00Z` in the example below: diff --git a/modules.json b/modules.json index 656f61c6..97603779 100644 --- a/modules.json +++ b/modules.json @@ -7,31 +7,34 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "artic/guppyplex": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" }, "artic/minion": { - "git_sha": "cab399507bea60d90de6d7b296163210c371b693" + "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" }, "bandage/image": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bcftools/consensus": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/filter": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/mpileup": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/norm": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/query": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "bcftools/sort": { + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/stats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bedtools/getfasta": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -49,28 +52,28 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bowtie2/align": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" }, "bowtie2/build": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "cat/fastq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "custom/getchromsizes": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" }, "fastp": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" }, "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, "gunzip": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" }, "ivar/consensus": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -82,31 +85,31 @@ "git_sha": "cab399507bea60d90de6d7b296163210c371b693" }, "kraken2/kraken2": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, "minia": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "mosdepth": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098" }, "nanoplot": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "nextclade/datasetget": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "nextclade/run": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "pangolin": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "picard/collectmultiplemetrics": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "picard/markduplicates": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "plasmidid": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -115,47 +118,44 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "quast": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "samtools/flagstat": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/idxstats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "samtools/mpileup": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/sort": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/stats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/view": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" }, "spades": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "tabix/bgzip": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" }, "tabix/tabix": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" }, "unicycler": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "untar": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" }, "vcflib/vcfuniq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" } } } -} \ No newline at end of file +} diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf index 4173cdb6..5e5ed81b 100644 --- a/modules/local/filter_blastn.nf +++ b/modules/local/filter_blastn.nf @@ -4,8 +4,8 @@ process FILTER_BLASTN { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(hits) diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index 825d87ff..cd220b24 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -3,12 +3,13 @@ process IVAR_VARIANTS_TO_VCF { conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' : - 'quay.io/biocontainers/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' : + 'quay.io/biocontainers/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' }" input: tuple val(meta), path(tsv) - path header + path fasta + path header output: tuple val(meta), path("*.vcf"), emit: vcf @@ -25,13 +26,11 @@ process IVAR_VARIANTS_TO_VCF { """ ivar_variants_to_vcf.py \\ $tsv \\ - unsorted.txt \\ + ${prefix}.vcf \\ + --fasta $fasta \\ $args \\ > ${prefix}.variant_counts.log - ## Order vcf by coordinates - cat unsorted.txt | grep "^#" > ${prefix}.vcf; cat unsorted.txt | grep -v "^#" | sort -k1,1d -k2,2n >> ${prefix}.vcf - cat $header ${prefix}.variant_counts.log > ${prefix}.variant_counts_mqc.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf index 6591d294..59a031c2 100644 --- a/modules/local/multiqc_illumina.nf +++ b/modules/local/multiqc_illumina.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf index cbd2b19d..e23db35c 100644 --- a/modules/local/multiqc_nanopore.nf +++ b/modules/local/multiqc_nanopore.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/rename_fasta_header.nf b/modules/local/rename_fasta_header.nf index 7d9c9901..36810983 100644 --- a/modules/local/rename_fasta_header.nf +++ b/modules/local/rename_fasta_header.nf @@ -3,8 +3,8 @@ process RENAME_FASTA_HEADER { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/artic/guppyplex/main.nf b/modules/nf-core/modules/artic/guppyplex/main.nf index 8e6b2879..2fd518e0 100644 --- a/modules/nf-core/modules/artic/guppyplex/main.nf +++ b/modules/nf-core/modules/artic/guppyplex/main.nf @@ -2,10 +2,10 @@ process ARTIC_GUPPYPLEX { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.1" : null) + conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.1--py_0' : - 'quay.io/biocontainers/artic:1.2.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" input: tuple val(meta), path(fastq_dir) @@ -20,6 +20,7 @@ process ARTIC_GUPPYPLEX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ artic \\ guppyplex \\ @@ -30,7 +31,7 @@ process ARTIC_GUPPYPLEX { pigz -p $task.cpus *.fastq cat <<-END_VERSIONS > versions.yml "${task.process}": - artic: \$(artic --version 2>&1 | sed 's/^.*artic //; s/ .*\$//') + artic: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/modules/artic/minion/main.nf b/modules/nf-core/modules/artic/minion/main.nf index 22a6fd87..1629d433 100644 --- a/modules/nf-core/modules/artic/minion/main.nf +++ b/modules/nf-core/modules/artic/minion/main.nf @@ -2,10 +2,10 @@ process ARTIC_MINION { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.1" : null) + conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.1--py_0' : - 'quay.io/biocontainers/artic:1.2.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" input: tuple val(meta), path(fastq) @@ -48,6 +48,7 @@ process ARTIC_MINION { model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string" } def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin" + def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ $hd5_plugin_path @@ -66,7 +67,7 @@ process ARTIC_MINION { cat <<-END_VERSIONS > versions.yml "${task.process}": - artic: \$(artic --version 2>&1 | sed 's/^.*artic //; s/ .*\$//') + artic: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/modules/bcftools/consensus/main.nf b/modules/nf-core/modules/bcftools/consensus/main.nf index a0c436e2..e28dc7f4 100644 --- a/modules/nf-core/modules/bcftools/consensus/main.nf +++ b/modules/nf-core/modules/bcftools/consensus/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONSENSUS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf), path(tbi), path(fasta) diff --git a/modules/nf-core/modules/bcftools/filter/main.nf b/modules/nf-core/modules/bcftools/filter/main.nf index 82961e32..ef99eda2 100644 --- a/modules/nf-core/modules/bcftools/filter/main.nf +++ b/modules/nf-core/modules/bcftools/filter/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/bcftools/mpileup/main.nf b/modules/nf-core/modules/bcftools/mpileup/main.nf index 676eae7a..b7795bfc 100644 --- a/modules/nf-core/modules/bcftools/mpileup/main.nf +++ b/modules/nf-core/modules/bcftools/mpileup/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_MPILEUP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/bcftools/norm/main.nf b/modules/nf-core/modules/bcftools/norm/main.nf index cd681f21..96f306bc 100644 --- a/modules/nf-core/modules/bcftools/norm/main.nf +++ b/modules/nf-core/modules/bcftools/norm/main.nf @@ -2,13 +2,13 @@ process BCFTOOLS_NORM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: - tuple val(meta), path(vcf) + tuple val(meta), path(vcf), path(tbi) path(fasta) output: @@ -34,4 +34,15 @@ process BCFTOOLS_NORM { bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bcftools/norm/meta.yml b/modules/nf-core/modules/bcftools/norm/meta.yml index ce4aee85..2b3c8eae 100644 --- a/modules/nf-core/modules/bcftools/norm/meta.yml +++ b/modules/nf-core/modules/bcftools/norm/meta.yml @@ -24,6 +24,12 @@ input: description: | The vcf file to be normalized e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" - fasta: type: file description: FASTA reference file @@ -37,7 +43,7 @@ output: - vcf: type: file description: VCF normalized output file - pattern: "*.{vcf.gz}" + pattern: "*.vcf.gz" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/bcftools/query/main.nf b/modules/nf-core/modules/bcftools/query/main.nf index 8921abdd..5de34a9e 100644 --- a/modules/nf-core/modules/bcftools/query/main.nf +++ b/modules/nf-core/modules/bcftools/query/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_QUERY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf new file mode 100644 index 00000000..9552b57c --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/main.nf @@ -0,0 +1,35 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools \\ + sort \\ + --output ${prefix}.vcf.gz \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/modules/bcftools/sort/meta.yml new file mode 100644 index 00000000..0c244a48 --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/meta.yml @@ -0,0 +1,43 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + +authors: + - "@Gwennid" diff --git a/modules/nf-core/modules/bcftools/stats/main.nf b/modules/nf-core/modules/bcftools/stats/main.nf index 7e150d1f..1e0f3a47 100644 --- a/modules/nf-core/modules/bcftools/stats/main.nf +++ b/modules/nf-core/modules/bcftools/stats/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_STATS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf index 7e8a9659..c74e376f 100644 --- a/modules/nf-core/modules/bowtie2/align/main.nf +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -1,77 +1,71 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label 'process_high' + label "process_high" - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : - 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) + container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : + "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" input: tuple val(meta), path(reads) path index val save_unaligned + val sort_bam output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.log') , emit: log - tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -U $reads \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" } else { - def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -1 ${reads[0]} \\ - -2 ${reads[1]} \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then - mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz - fi - if [ -f ${prefix}.unmapped.fastq.2.gz ]; then - mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz - fi + def samtools_command = sort_bam ? 'sort' : 'view' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - } + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml index f80421ec..42ba0f96 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -2,7 +2,9 @@ name: bowtie2_align description: Align reads to a reference genome using bowtie2 keywords: - align + - map - fasta + - fastq - genome - reference tools: @@ -29,6 +31,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" output: - bam: type: file diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/modules/cat/fastq/main.nf index bf0877c3..b6854895 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/modules/cat/fastq/main.nf @@ -4,8 +4,8 @@ process CAT_FASTQ { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d5100..12293efc 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_low' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf index 39da7d34..0eabf3a4 100644 --- a/modules/nf-core/modules/custom/getchromsizes/main.nf +++ b/modules/nf-core/modules/custom/getchromsizes/main.nf @@ -2,10 +2,10 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: path fasta diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf index d8218350..120392c5 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/modules/fastp/main.nf @@ -13,7 +13,7 @@ process FASTP { val save_merged output: - tuple val(meta), path('*.trim.fastq.gz') , emit: reads + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads tuple val(meta), path('*.json') , emit: json tuple val(meta), path('*.html') , emit: html tuple val(meta), path('*.log') , emit: log @@ -31,10 +31,10 @@ process FASTP { if (meta.single_end) { def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.trim.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ @@ -50,13 +50,13 @@ process FASTP { def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz fastp \\ --in1 ${prefix}_1.fastq.gz \\ --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.trim.fastq.gz \\ - --out2 ${prefix}_2.trim.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml index f53bb09f..2bd2b1a9 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/modules/fastp/meta.yml @@ -22,6 +22,12 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` output: - meta: @@ -32,7 +38,7 @@ output: - reads: type: file description: The trimmed/modified/unmerged fastq reads - pattern: "*trim.fastq.gz" + pattern: "*fastp.fastq.gz" - json: type: file description: Results in JSON format diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index ed6b8c50..05730368 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 9d4b0666..70367049 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index 3ec5df52..d4000233 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 { input: tuple val(meta), path(reads) path db + val save_output_fastqs + val save_reads_assignment output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*report.txt') , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ + $unclassified_command \\ + $classified_command \\ + $readclassification_command \\ $paired \\ $args \\ $reads - pigz -p $task.cpus *.fastq + $compress_reads_command cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/kraken2/kraken2/meta.yml b/modules/nf-core/modules/kraken2/kraken2/meta.yml index 9d6a3855..7129fe3a 100644 --- a/modules/nf-core/modules/kraken2/kraken2/meta.yml +++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml @@ -27,25 +27,40 @@ input: - db: type: directory description: Kraken2 database + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified: + - classified_reads_fastq: type: file description: | - Reads classified to belong to any of the taxa + Reads classified as belonging to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - unclassified: + - unclassified_reads_fastq: type: file description: | - Reads not classified to belong to any of the taxa + Reads not classified to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - txt: + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: type: file description: | Kraken2 report containing stats about classified diff --git a/modules/nf-core/modules/mosdepth/main.nf b/modules/nf-core/modules/mosdepth/main.nf index ff91e06f..d7e3c929 100644 --- a/modules/nf-core/modules/mosdepth/main.nf +++ b/modules/nf-core/modules/mosdepth/main.nf @@ -10,18 +10,22 @@ process MOSDEPTH { input: tuple val(meta), path(bam), path(bai) path bed - val window_size + path fasta output: - tuple val(meta), path('*.global.dist.txt') , emit: global_txt - tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true - tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: d4 , optional:true - tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true - tuple val(meta), path('*.per-base.bed.gz.csi'), emit: per_base_csi, optional:true - tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true - tuple val(meta), path('*.regions.bed.gz.csi') , emit: regions_csi , optional:true - path "versions.yml" , emit: versions + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,19 +33,24 @@ process MOSDEPTH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if (window_size) { - interval = "--by ${window_size}" - } else if ( bed ) { - interval = "--by ${bed}" - } else { - interval = "" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } + if (!bed && args.contains("--thresholds")) { + exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + } + """ mosdepth \\ + --threads $task.cpus \\ $interval \\ + $reference \\ $args \\ $prefix \\ $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') @@ -59,6 +68,10 @@ process MOSDEPTH { touch ${prefix}.per-base.bed.gz.csi touch ${prefix}.regions.bed.gz touch ${prefix}.regions.bed.gz.csi + touch ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + touch ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/mosdepth/meta.yml b/modules/nf-core/modules/mosdepth/meta.yml index 636e966b..d1e33447 100644 --- a/modules/nf-core/modules/mosdepth/meta.yml +++ b/modules/nf-core/modules/mosdepth/meta.yml @@ -30,10 +30,10 @@ input: type: file description: BED file with intersected intervals pattern: "*.{bed}" - - window_size: - type: integer - description: Window size - pattern: "[0-9]+" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" output: - meta: type: map @@ -60,6 +60,10 @@ output: type: file description: Index file for BED file with per-base coverage pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" - regions_bed: type: file description: BED file with per-region coverage @@ -68,6 +72,22 @@ output: type: file description: Index file for BED file with per-region coverage pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" - versions: type: file description: File containing software versions @@ -76,3 +96,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/modules/nanoplot/main.nf b/modules/nf-core/modules/nanoplot/main.nf index 083e2374..83c0e2ec 100644 --- a/modules/nf-core/modules/nanoplot/main.nf +++ b/modules/nf-core/modules/nanoplot/main.nf @@ -2,10 +2,10 @@ process NANOPLOT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::nanoplot=1.39.0' : null) + conda (params.enable_conda ? 'bioconda::nanoplot=1.40.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nanoplot:1.39.0--pyhdfd78af_0' : - 'quay.io/biocontainers/nanoplot:1.39.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/nanoplot:1.40.0--pyhdfd78af_0' : + 'quay.io/biocontainers/nanoplot:1.40.0--pyhdfd78af_0' }" input: tuple val(meta), path(ontfile) diff --git a/modules/nf-core/modules/nextclade/datasetget/main.nf b/modules/nf-core/modules/nextclade/datasetget/main.nf index 4dd82ee3..a9f52c84 100644 --- a/modules/nf-core/modules/nextclade/datasetget/main.nf +++ b/modules/nf-core/modules/nextclade/datasetget/main.nf @@ -2,10 +2,10 @@ process NEXTCLADE_DATASETGET { tag "$dataset" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=1.10.2" : null) + conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:1.10.2--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:1.10.2--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" input: val dataset @@ -36,7 +36,7 @@ process NEXTCLADE_DATASETGET { cat <<-END_VERSIONS > versions.yml "${task.process}": - nextclade: \$(nextclade --version 2>&1) + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/nextclade/run/main.nf b/modules/nf-core/modules/nextclade/run/main.nf index 4d4bdb88..22f72781 100644 --- a/modules/nf-core/modules/nextclade/run/main.nf +++ b/modules/nf-core/modules/nextclade/run/main.nf @@ -2,21 +2,26 @@ process NEXTCLADE_RUN { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=1.10.2" : null) + conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:1.10.2--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:1.10.2--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" input: tuple val(meta), path(fasta) path dataset output: - tuple val(meta), path("${prefix}.csv") , emit: csv - tuple val(meta), path("${prefix}.tsv") , emit: tsv - tuple val(meta), path("${prefix}.json") , emit: json - tuple val(meta), path("${prefix}.tree.json"), emit: json_tree - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.csv") , optional:true, emit: csv + tuple val(meta), path("${prefix}.errors.csv") , optional:true, emit: csv_errors + tuple val(meta), path("${prefix}.insertions.csv"), optional:true, emit: csv_insertions + tuple val(meta), path("${prefix}.tsv") , optional:true, emit: tsv + tuple val(meta), path("${prefix}.json") , optional:true, emit: json + tuple val(meta), path("${prefix}.auspice.json") , optional:true, emit: json_auspice + tuple val(meta), path("${prefix}.ndjson") , optional:true, emit: ndjson + tuple val(meta), path("${prefix}.aligned.fasta") , optional:true, emit: fasta_aligned + tuple val(meta), path("*.translation.fasta") , optional:true, emit: fasta_translation + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,17 +34,14 @@ process NEXTCLADE_RUN { run \\ $args \\ --jobs $task.cpus \\ - --input-fasta $fasta \\ --input-dataset $dataset \\ - --output-csv ${prefix}.csv \\ - --output-tsv ${prefix}.tsv \\ - --output-json ${prefix}.json \\ - --output-tree ${prefix}.tree.json \\ - --output-basename ${prefix} + --output-all ./ \\ + --output-basename ${prefix} \\ + $fasta cat <<-END_VERSIONS > versions.yml "${task.process}": - nextclade: \$(nextclade --version 2>&1) + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/pangolin/main.nf b/modules/nf-core/modules/pangolin/main.nf index 5af557ac..6414b5d3 100644 --- a/modules/nf-core/modules/pangolin/main.nf +++ b/modules/nf-core/modules/pangolin/main.nf @@ -2,10 +2,10 @@ process PANGOLIN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::pangolin=3.1.20' : null) + conda (params.enable_conda ? 'bioconda::pangolin=4.1.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pangolin:3.1.20--pyhdfd78af_0' : - 'quay.io/biocontainers/pangolin:3.1.20--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pangolin:4.1.1--pyhdfd78af_0' : + 'quay.io/biocontainers/pangolin:4.1.1--pyhdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf index e023ea3c..63f4e872 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf @@ -2,14 +2,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) path fasta + path fai output: tuple val(meta), path("*_metrics"), emit: metrics @@ -22,6 +23,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -33,13 +35,33 @@ process PICARD_COLLECTMULTIPLEMETRICS { -Xmx${avail_mem}g \\ CollectMultipleMetrics \\ $args \\ - INPUT=$bam \\ - OUTPUT=${prefix}.CollectMultipleMetrics \\ - REFERENCE_SEQUENCE=$fasta + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference cat <<-END_VERSIONS > versions.yml "${task.process}": picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml index 68b5c65e..c11b02cf 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml @@ -28,6 +28,10 @@ input: - fasta: type: file description: Genome fasta file + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" output: - meta: type: map diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf index 5196b6ed..4e559fea 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -33,9 +33,22 @@ process PICARD_MARKDUPLICATES { -Xmx${avail_mem}g \\ MarkDuplicates \\ $args \\ - I=$bam \\ - O=${prefix}.bam \\ - M=${prefix}.MarkDuplicates.metrics.txt + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/quast/main.nf b/modules/nf-core/modules/quast/main.nf index beb797d4..5585491b 100644 --- a/modules/nf-core/modules/quast/main.nf +++ b/modules/nf-core/modules/quast/main.nf @@ -1,10 +1,10 @@ process QUAST { label 'process_medium' - conda (params.enable_conda ? 'bioconda::quast=5.0.2' : null) + conda (params.enable_conda ? 'bioconda::quast=5.2.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quast:5.0.2--py37pl526hb5aa323_2' : - 'quay.io/biocontainers/quast:5.0.2--py37pl526hb5aa323_2' }" + 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' : + 'quay.io/biocontainers/quast:5.2.0--py39pl5321h2add14b_1' }" input: path consensus diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf index c267922b..03ec2dcf 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,12 +19,13 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf index 8a057413..4b245419 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,11 +19,13 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf index dfe0234f..e04e63e8 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(input) @@ -33,4 +33,16 @@ process SAMTOOLS_INDEX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/mpileup/main.nf b/modules/nf-core/modules/samtools/mpileup/main.nf deleted file mode 100644 index 77afae60..00000000 --- a/modules/nf-core/modules/samtools/mpileup/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process SAMTOOLS_MPILEUP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - - input: - tuple val(meta), path(bam) - path fasta - - output: - tuple val(meta), path("*.mpileup"), emit: mpileup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - samtools mpileup \\ - --fasta-ref $fasta \\ - --output ${prefix}.mpileup \\ - $args \\ - $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/mpileup/meta.yml b/modules/nf-core/modules/samtools/mpileup/meta.yml deleted file mode 100644 index c384f5c6..00000000 --- a/modules/nf-core/modules/samtools/mpileup/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: samtools_mpileup -description: BAM -keywords: - - mpileup - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: FASTA reference file - pattern: "*.{fasta,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - mpileup: - type: file - description: mpileup file - pattern: "*.{mpileup}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@joseespinosa" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf index 0f2237cc..b4fc1cbe 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam) @@ -28,4 +28,15 @@ process SAMTOOLS_SORT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf index f6fe3bfe..c913bc5e 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -2,13 +2,13 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(bam), path(bai) path fasta output: @@ -20,14 +20,26 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ --threads ${task.cpus-1} \\ ${reference} \\ - ${input} \\ - > ${input}.stats + ${bam} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf index aee21a4e..55194e88 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -2,13 +2,13 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input) + tuple val(meta), path(input), path(index) path fasta output: @@ -41,4 +41,16 @@ process SAMTOOLS_VIEW { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/nf-core/modules/samtools/view/meta.yml index 5604bfa7..a8b43ecc 100644 --- a/modules/nf-core/modules/samtools/view/meta.yml +++ b/modules/nf-core/modules/samtools/view/meta.yml @@ -25,6 +25,10 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + - index: + type: optional file + description: BAM.BAI/CRAM.CRAI file + pattern: "*.{.bai,.crai}" - fasta: type: optional file description: Reference file the CRAM was created with diff --git a/modules/nf-core/modules/spades/main.nf b/modules/nf-core/modules/spades/main.nf index b7ece6f6..a467fcd7 100644 --- a/modules/nf-core/modules/spades/main.nf +++ b/modules/nf-core/modules/spades/main.nf @@ -2,10 +2,10 @@ process SPADES { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::spades=3.15.3' : null) + conda (params.enable_conda ? 'bioconda::spades=3.15.4' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : - 'quay.io/biocontainers/spades:3.15.3--h95f258a_0' }" + 'https://depot.galaxyproject.org/singularity/spades:3.15.4--h95f258a_0' : + 'quay.io/biocontainers/spades:3.15.4--h95f258a_0' }" input: tuple val(meta), path(illumina), path(pacbio), path(nanopore) diff --git a/modules/nf-core/modules/tabix/bgzip/main.nf b/modules/nf-core/modules/tabix/bgzip/main.nf index 90940a5d..18e83c84 100644 --- a/modules/nf-core/modules/tabix/bgzip/main.nf +++ b/modules/nf-core/modules/tabix/bgzip/main.nf @@ -11,17 +11,20 @@ process TABIX_BGZIP { tuple val(meta), path(input) output: - tuple val(meta), path("*.gz"), emit: gz - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}*"), emit: output + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = input.toString().endsWith(".gz") + command1 = in_bgzip ? '-d' : '-c' + command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" """ - bgzip -c $args $input > ${prefix}.${input.getExtension()}.gz + bgzip $command1 $args -@${task.cpus} $input $command2 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/modules/tabix/bgzip/meta.yml index 207427e4..50070175 100644 --- a/modules/nf-core/modules/tabix/bgzip/meta.yml +++ b/modules/nf-core/modules/tabix/bgzip/meta.yml @@ -1,13 +1,14 @@ name: tabix_bgzip -description: Compresses files +description: Compresses/decompresses files keywords: - compress + - decompress - bgzip - tabix tools: - bgzip: description: | - Bgzip compresses files in a similar manner to, and compatible with, gzip. + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. homepage: https://www.htslib.org/doc/tabix.html documentation: http://www.htslib.org/doc/bgzip.html doi: 10.1093/bioinformatics/btp352 @@ -18,19 +19,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - file: + - input: type: file - description: text file + description: file to compress or to decompress output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - file: + - output: type: file - description: Output compressed file - pattern: "*.{gz}" + description: Output compressed/decompressed file + pattern: "*." - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/tabix/tabix/main.nf b/modules/nf-core/modules/tabix/tabix/main.nf index 5f516261..e155e468 100644 --- a/modules/nf-core/modules/tabix/tabix/main.nf +++ b/modules/nf-core/modules/tabix/tabix/main.nf @@ -11,7 +11,8 @@ process TABIX_TABIX { tuple val(meta), path(tab) output: - tuple val(meta), path("*.tbi"), emit: tbi + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi path "versions.yml" , emit: versions when: @@ -27,4 +28,15 @@ process TABIX_TABIX { tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${tab}.tbi + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/tabix/tabix/meta.yml b/modules/nf-core/modules/tabix/tabix/meta.yml index 89478abe..fcc6e524 100644 --- a/modules/nf-core/modules/tabix/tabix/meta.yml +++ b/modules/nf-core/modules/tabix/tabix/meta.yml @@ -31,6 +31,10 @@ output: type: file description: tabix index file pattern: "*.{tbi}" + - csi: + type: file + description: coordinate sorted index file + pattern: "*.{csi}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index 01205e60..29ab10a5 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -4,29 +4,46 @@ process UNTAR { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: - path archive + tuple val(meta), path(archive) output: - path "$untar" , emit: untar - path "versions.yml", emit: versions + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml index e877a97c..d426919b 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/modules/untar/meta.yml @@ -10,11 +10,21 @@ tools: documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - archive: type: file description: File to be untar pattern: "*.{tar}.{gz}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - untar: type: file description: diff --git a/modules/nf-core/modules/vcflib/vcfuniq/main.nf b/modules/nf-core/modules/vcflib/vcfuniq/main.nf index a01e8485..707f074b 100644 --- a/modules/nf-core/modules/vcflib/vcfuniq/main.nf +++ b/modules/nf-core/modules/vcflib/vcfuniq/main.nf @@ -1,13 +1,12 @@ -def VERSION = '1.0.2' // Version information not provided by tool on CLI - process VCFLIB_VCFUNIQ { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::vcflib=1.0.2" : null) + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::vcflib=1.0.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vcflib:1.0.2--h3198e80_5': - 'quay.io/biocontainers/vcflib:1.0.2--h3198e80_5' }" + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'quay.io/biocontainers/vcflib:1.0.3--hecb563c_1' }" input: tuple val(meta), path(vcf), path(tbi) @@ -22,6 +21,7 @@ process VCFLIB_VCFUNIQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ vcfuniq \\ $vcf \\ diff --git a/nextflow.config b/nextflow.config index 6bceabfd..45c356a8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -225,7 +225,7 @@ trace { } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -235,7 +235,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.4.1' + version = '2.5' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index c613bed0..1e8b7d38 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,9 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "outdir" - ], + "required": ["outdir"], "properties": { "input": { "type": "string", @@ -28,19 +26,13 @@ "type": "string", "fa_icon": "fas fa-hdd", "description": "NGS platform used to sequence the samples.", - "enum": [ - "illumina", - "nanopore" - ] + "enum": ["illumina", "nanopore"] }, "protocol": { "type": "string", "description": "Specifies the type of protocol used for sequencing.", "fa_icon": "fas fa-vials", - "enum": [ - "metagenomic", - "amplicon" - ] + "enum": ["metagenomic", "amplicon"] }, "outdir": { "type": "string", @@ -188,20 +180,14 @@ "default": "nanopolish", "description": "Variant caller used when running artic minion (default: 'nanopolish').", "fa_icon": "fas fa-phone-volume", - "enum": [ - "nanopolish", - "medaka" - ] + "enum": ["nanopolish", "medaka"] }, "artic_minion_aligner": { "type": "string", "default": "minimap2", "description": "Aligner used when running artic minion (default: 'minimap2').", "fa_icon": "fas fa-map-signs", - "enum": [ - "minimap2", - "bwa" - ] + "enum": ["minimap2", "bwa"] }, "artic_scheme": { "type": "string", @@ -394,20 +380,14 @@ "type": "string", "fa_icon": "fas fa-phone-volume", "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", - "enum": [ - "ivar", - "bcftools" - ] + "enum": ["ivar", "bcftools"] }, "consensus_caller": { "type": "string", "default": "bcftools", "fa_icon": "fas fa-phone-volume", "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools').", - "enum": [ - "ivar", - "bcftools" - ] + "enum": ["ivar", "bcftools"] }, "min_mapped_reads": { "type": "integer", @@ -564,23 +544,16 @@ "help": { "type": "boolean", "description": "Display help text.", - "hidden": true, - "fa_icon": "fas fa-question-circle" + "fa_icon": "fas fa-question-circle", + "hidden": true }, "publish_dir_mode": { "type": "string", "default": "copy", "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files to the location specified by the `--outdir` parameter. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -753,4 +726,4 @@ "$ref": "#/definitions/institutional_config_options" } ] -} \ No newline at end of file +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 4762335f..9d83d9f1 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -21,7 +21,7 @@ workflow INPUT_CHECK { .out .csv .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } + .map { create_fastq_channel(it) } .set { sample_info } } else if (platform == 'nanopore') { SAMPLESHEET_CHECK @@ -38,22 +38,24 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channels(LinkedHashMap row) { +def create_fastq_channel(LinkedHashMap row) { + // create meta map def meta = [:] meta.id = row.sample meta.single_end = row.single_end.toBoolean() - def array = [] + // add path(s) of the fastq file(s) to the meta map + def fastq_meta = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } if (meta.single_end) { - array = [ meta, [ file(row.fastq_1) ] ] + fastq_meta = [ meta, [ file(row.fastq_1) ] ] } else { if (!file(row.fastq_2).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" } - array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } - return array + return fastq_meta } diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index 1a9a54c5..70d1e53e 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -76,9 +76,9 @@ workflow PREPARE_GENOME { if (params.kraken2_db) { if (params.kraken2_db.endsWith('.tar.gz')) { UNTAR_KRAKEN2_DB ( - params.kraken2_db + [ [:], params.kraken2_db ] ) - ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) } else { ch_kraken2_db = file(params.kraken2_db) @@ -151,9 +151,9 @@ workflow PREPARE_GENOME { if (params.bowtie2_index) { if (params.bowtie2_index.endsWith('.tar.gz')) { UNTAR_BOWTIE2_INDEX ( - params.bowtie2_index + [ [:], params.bowtie2_index ] ) - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) } else { ch_bowtie2_index = file(params.bowtie2_index) @@ -175,9 +175,9 @@ workflow PREPARE_GENOME { if (params.nextclade_dataset) { if (params.nextclade_dataset.endsWith('.tar.gz')) { UNTAR_NEXTCLADE_DB ( - params.nextclade_dataset + [ [:], params.nextclade_dataset ] ) - ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar + ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_NEXTCLADE_DB.out.versions) } else { ch_nextclade_db = file(params.nextclade_dataset) @@ -202,9 +202,9 @@ workflow PREPARE_GENOME { if (params.blast_db) { if (params.blast_db.endsWith('.tar.gz')) { UNTAR_BLAST_DB ( - params.blast_db + [ [:], params.blast_db ] ) - ch_blast_db = UNTAR_BLAST_DB.out.untar + ch_blast_db = UNTAR_BLAST_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BLAST_DB.out.versions) } else { ch_blast_db = file(params.blast_db) diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index 6d2449d7..208e582b 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -94,9 +94,9 @@ workflow PREPARE_GENOME { if (params.nextclade_dataset) { if (params.nextclade_dataset.endsWith('.tar.gz')) { UNTAR ( - params.nextclade_dataset + [ [:], params.nextclade_dataset ] ) - ch_nextclade_db = UNTAR.out.untar + ch_nextclade_db = UNTAR.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR.out.versions) } else { ch_nextclade_db = file(params.nextclade_dataset) diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 662c860a..868ff522 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -56,7 +56,7 @@ workflow VARIANTS_BCFTOOLS { // Split multi-allelic positions // BCFTOOLS_NORM ( - ch_vcf, + ch_vcf.join(ch_tbi, by: [0]), fasta ) ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 558cc560..fee5c54c 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -4,7 +4,8 @@ include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' +include { BCFTOOLS_SORT } from '../../modules/nf-core/modules/bcftools/sort/main' +include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { @@ -47,22 +48,28 @@ workflow VARIANTS_IVAR { // IVAR_VARIANTS_TO_VCF ( ch_ivar_tsv, + fasta, ivar_multiqc_header ) ch_versions = ch_versions.mix(IVAR_VARIANTS_TO_VCF.out.versions.first()) - VCF_BGZIP_TABIX_STATS ( + BCFTOOLS_SORT ( IVAR_VARIANTS_TO_VCF.out.vcf ) - ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) + + VCF_TABIX_STATS ( + BCFTOOLS_SORT.out.vcf + ) + ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) // // Run downstream tools for variants QC // VARIANTS_QC ( bam, - VCF_BGZIP_TABIX_STATS.out.vcf, - VCF_BGZIP_TABIX_STATS.out.stats, + BCFTOOLS_SORT.out.vcf, + VCF_TABIX_STATS.out.stats, fasta, sizes, gff, @@ -79,9 +86,9 @@ workflow VARIANTS_IVAR { log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 1ec4764b..af2f676d 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -10,6 +10,7 @@ workflow ALIGN_BOWTIE2 { reads // channel: [ val(meta), [ reads ] ] index // channel: /path/to/bowtie2/index/ save_unaligned // value: boolean + sort_bam // value: boolean main: @@ -21,7 +22,8 @@ workflow ALIGN_BOWTIE2 { BOWTIE2_ALIGN ( reads, index, - save_unaligned + save_unaligned, + sort_bam ) ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 45be0c14..13ba31eb 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -6,6 +6,16 @@ include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/modules/fastqc/mai include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/modules/fastqc/main' include { FASTP } from '../../modules/nf-core/modules/fastp/main' +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toInteger() +} + workflow FASTQC_FASTP { take: reads // channel: [ val(meta), [ reads ] ] @@ -49,6 +59,19 @@ workflow FASTQC_FASTP { trim_reads_merged = FASTP.out.reads_merged ch_versions = ch_versions.mix(FASTP.out.versions.first()) + // + // Filter empty FastQ files after adapter trimming so FastQC doesn't fail + // + trim_reads + .join(trim_json) + .map { + meta, reads, json -> + if (getFastpReadsAfterFiltering(json) > 0) { + [ meta, reads ] + } + } + .set { trim_reads } + if (!params.skip_fastqc) { FASTQC_TRIM ( trim_reads diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index cfa8b568..050bf085 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -8,7 +8,8 @@ include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' workflow FILTER_BAM_SAMTOOLS { take: - bam // channel: [ val(meta), [ bam ] ] + bam_bai // channel: [ val(meta), [ bam ], [ bai ] ] + fasta // path : fasta main: @@ -18,8 +19,8 @@ workflow FILTER_BAM_SAMTOOLS { // Filter BAM using Samtools view // SAMTOOLS_VIEW ( - bam, - [] + bam_bai, + fasta ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index 6df4bad7..67e4f992 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -19,12 +19,12 @@ workflow VCF_BGZIP_TABIX_STATS { ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) VCF_TABIX_STATS ( - TABIX_BGZIP.out.gz + TABIX_BGZIP.out.output ) ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) emit: - vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] + vcf = TABIX_BGZIP.out.output // channel: [ val(meta), [ vcf.gz ] ] tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 7557982c..f4010465 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -39,7 +39,7 @@ if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : ======================================================================================== */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config_illumina.yaml", checkIfExists: true) +ch_multiqc_config = file("$projectDir/assets/multiqc_config_illumina.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config) : [] // Header files @@ -251,17 +251,19 @@ workflow ILLUMINA { if (!params.skip_kraken2) { KRAKEN2_KRAKEN2 ( ch_variants_fastq, - PREPARE_GENOME.out.kraken2_db + PREPARE_GENOME.out.kraken2_db, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter ) - ch_kraken2_multiqc = KRAKEN2_KRAKEN2.out.txt + ch_kraken2_multiqc = KRAKEN2_KRAKEN2.out.report ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first().ifEmpty(null)) if (params.kraken2_variants_host_filter) { - ch_variants_fastq = KRAKEN2_KRAKEN2.out.unclassified + ch_variants_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq } if (params.kraken2_assembly_host_filter) { - ch_assembly_fastq = KRAKEN2_KRAKEN2.out.unclassified + ch_assembly_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq } } @@ -276,7 +278,8 @@ workflow ILLUMINA { ALIGN_BOWTIE2 ( ch_variants_fastq, PREPARE_GENOME.out.bowtie2_index, - params.save_unaligned + params.save_unaligned, + false ) ch_bam = ALIGN_BOWTIE2.out.bam ch_bai = ALIGN_BOWTIE2.out.bai @@ -358,7 +361,8 @@ workflow ILLUMINA { if (!params.skip_variants && !params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( ch_bam, - PREPARE_GENOME.out.fasta + PREPARE_GENOME.out.fasta, + [] ) ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first().ifEmpty(null)) } @@ -372,7 +376,7 @@ workflow ILLUMINA { MOSDEPTH_GENOME ( ch_bam.join(ch_bai, by: [0]), [], - 200 + [] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -386,7 +390,7 @@ workflow ILLUMINA { MOSDEPTH_AMPLICON ( ch_bam.join(ch_bai, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, - 0 + [] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null)) @@ -411,8 +415,8 @@ workflow ILLUMINA { VARIANTS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.chrom_sizes, + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.fai : [], + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db, @@ -435,7 +439,7 @@ workflow ILLUMINA { VARIANTS_BCFTOOLS ( ch_bam, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.chrom_sizes, + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db, diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index da7ebf24..9bb63231 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -38,7 +38,7 @@ if (params.artic_minion_caller == 'medaka') { ======================================================================================== */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config_nanopore.yaml", checkIfExists: true) +ch_multiqc_config = file("$projectDir/assets/multiqc_config_nanopore.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config) : [] /* @@ -308,7 +308,7 @@ workflow NANOPORE { PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.primer_bed, ch_medaka_model.collect().ifEmpty([]), - params.artic_minion_medaka_model, + params.artic_minion_medaka_model ?: '', params.artic_scheme, params.primer_set_version ) @@ -342,7 +342,8 @@ workflow NANOPORE { // SUBWORKFLOW: Filter unmapped reads from BAM // FILTER_BAM_SAMTOOLS ( - ARTIC_MINION.out.bam + ARTIC_MINION.out.bam.join(ARTIC_MINION.out.bai, by: [0]), + [] ) ch_versions = ch_versions.mix(FILTER_BAM_SAMTOOLS.out.versions) @@ -356,7 +357,7 @@ workflow NANOPORE { MOSDEPTH_GENOME ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), [], - 200 + [] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -369,7 +370,7 @@ workflow NANOPORE { MOSDEPTH_AMPLICON ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, - 0 + [] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null))