diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..b290e09 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index b6b3190..dd9ffa5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes index 050bb12..7a2dabc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 952c58d..18c0022 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/variantmtb then the best place to ask is on the nf-core Slack [#variantmtb](https://nfcore.slack.com/channels/variantmtb) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -101,3 +108,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/variantmtb/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 5e96c6a..2111607 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/variantmtb _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1a986c8..fd747a2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,10 +15,11 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vari - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/variantmtb/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/variantmtb _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/variantmtb/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/variantmtb _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 049e2d5..a1a77c2 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,17 +14,26 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 - # TODO nf-core: You can customise AWS full pipeline tests as required + uses: seqeralabs/action-tower-launch@v2 + # nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/variantmtb/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/variantmtb/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + + - uses: actions/upload-artifact@v4 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 3a9971e..6fa6118 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,14 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/variantmtb/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/variantmtb/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index e9e3f3a..cf06260 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,13 +13,13 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/variantmtb' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/variantmtb ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/variantmtb ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 050329d..e9d183a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,10 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: @@ -20,30 +23,23 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "23.04.0" + - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required + # nf-core: You can customise CI pipeline run tests as required # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..0b6b1f2 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..08622fd --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,72 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index a20999f..dd1918d 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" - else - echo "::set-output name=result::fail" - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/variantmtb/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358de..1fcafe8 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,53 +4,41 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v2 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-node@v2 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier + - name: Install pre-commit + run: pip install pre-commit - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.6" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -71,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f6..40acc23 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,17 +11,17 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..d468aea --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitignore b/.gitignore index 5124c9a..5027711 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ results/ testing/ testing* *.pyc +dev_test +workflows/test_module +*.log* diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..105a182 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..764c6de 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,7 @@ repository_type: pipeline +nf_core_version: "2.14.1" +lint: + files_unchanged: + - assets/nf-core-variantmtb_logo_light.png + - docs/images/nf-core-variantmtb_logo_light.png + - docs/images/nf-core-variantmtb_logo_dark.png diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..af57081 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore index d0e7ae5..437d763 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -7,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 9862cd4..29ba74f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,63 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.0](https://github.com/qbic-pipelines/variantmtb/releases/tag/1.0.0) - Piz Bernina + +### Added + +- filter civic evidences according to patient cancer type +- filter civic evidences through custom filters +- profile `actionability` containing common civic evidence filters for actionability queries + +### Fixed + +- [#10](https://github.com/qbic-pipelines/variantmtb/issues/10) CGI used to fail when querying files in parallel. Disabled parallelization for CGI. +- important querynator fixes, see [querynator:0.5.5 release notes](https://github.com/qbic-pipelines/querynator/releases/tag/0.5.5) + +### Dependencies + +- updated querynator to 0.6.0 + +### Deprecated + +### Removed + +- `--cgi_cancer_type` is no longer supported. specify cancer type in sample sheet instead using fields `cgi_cancer` and `civic_cancer` + +## [0.2.0](https://github.com/qbic-pipelines/variantmtb/releases/tag/0.2.0) - Wendelstein + +### Added + +- nextflow secrets for CGI credentials. + +### Fixed + +### Dependencies + +- bcftools version 1.18. + +### Deprecated + +## [0.1.0](https://github.com/qbic-pipelines/variantmtb/releases/tag/0.1.0) - Paris-Roubaix + +### Added + +- [#1](https://github.com/qbic-pipelines/variantmtb/pull/1) - Query to CGI & CIViC. Creation of a comprehensive HTML report. + +### Fixed + +### Dependencies + +### Deprecated + ## v1.0dev - [date] Initial release of nf-core/variantmtb, created with the [nf-core](https://nf-co.re/) template. -### `Added` +### Added -### `Fixed` +### Fixed -### `Dependencies` +### Dependencies -### `Deprecated` +### Deprecated diff --git a/CITATIONS.md b/CITATIONS.md index 736b6f5..bc0365d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,10 +10,15 @@ ## Pipeline tools -- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [Tabix](http://www.htslib.org/doc/tabix.html) +- [bcftools norm](https://samtools.github.io/bcftools/bcftools.html#norm) -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [CGI](https://www.cancergenomeinterpreter.org/home) + > Tamborero, D., Rubio-Perez, C., Deu-Pons, J., Schroeder, M. P., Vivancos, A., Rovira, A., ... & Lopez-Bigas, N. (2018). Cancer Genome Interpreter annotates the biological and clinical relevance of tumor alterations. Genome medicine, 10, 1-8. +- [CIViC](https://civicdb.org/welcome) + > Griffith, M., Spies, N. C., Krysiak, K., McMichael, J. F., Coffman, A. C., Danos, A. M., ... & Griffith, O. L. (2017). CIViC is a community knowledgebase for expert crowdsourcing the clinical interpretation of variants in cancer. Nature genetics, 49(2), 170-174. +- [CIViCpy](https://docs.civicpy.org/en/latest/) + > Wagner, A. H., Kiwala, S., Coffman, A. C., McMichael, J. F., Cotto, K. C., Mooney, T. B., ... & Griffith, M. (2020). CIViCpy: a python software development and analysis toolkit for the CIViC knowledgebase. JCO Clinical Cancer Informatics, 4, 245-253. ## Software packaging/containerisation tools @@ -31,5 +36,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index b63e068..53fbf43 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) SusiJo +Copyright (c) SusiJo, mapo9, HomoPolyethylen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index dbe5b95..a32bbb9 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,47 @@ -# ![nf-core/variantmtb](docs/images/nf-core-variantmtb_logo_light.png#gh-light-mode-only) ![nf-core/variantmtb](docs/images/nf-core-variantmtb_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/variantmtb + +

-[![GitHub Actions CI Status](https://github.com/nf-core/variantmtb/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/variantmtb/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/variantmtb/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/variantmtb/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/variantmtb/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions CI Status](https://github.com/qbic-pipelines/variantmtb/actions/workflows/ci.yml/badge.svg)](https://github.com/qbic-pipelines/variantmtb/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/qbic-pipelines/variantmtb/actions/workflows/linting.yml/badge.svg)](https://github.com/qbic-pipelines/variantmtb/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/variantmtb/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/variantmtb) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/variantmtb) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23variantmtb-4A154B?logo=slack)](https://nfcore.slack.com/channels/variantmtb) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23variantmtb-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/variantmtb)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction - + -**nf-core/variantmtb** is a bioinformatics best-practice analysis pipeline for This pipeline queries variant databases to investigate the biological and predictive relevance of tumor variants. +**qbic-pipelines/variantmtb** is a bioinformatics best-practice analysis pipeline for querying variant databases to investigate the diagnostic, prognostic and predictive relevance of tumor variants. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! - + -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/variantmtb/results). + + +

+ +

## Pipeline summary - + -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Normalize variants [bcftools norm](https://www.htslib.org/doc/1.0/bcftools.html#norm) +2. Index VCF file [tabix](http://www.htslib.org/doc/tabix.html) +3. Query [CGI](https://www.cancergenomeinterpreter.org/home) +4. Query [CIViC](https://civicdb.org/variants/home) +5. Categorize variants and create an comprehensive HTML report ## Quick Start @@ -55,10 +64,10 @@ On release, automated continuous integration tests run the pipeline on a full-si 4. Start running your own analysis! - + ```console - nextflow run nf-core/variantmtb --input samplesheet.csv --outdir --genome GRCh37 -profile + nextflow run qbic-pipelines/variantmtb -r dev --input samplesheet.csv --outdir --genome GRCh38 -profile ``` ## Documentation @@ -67,11 +76,10 @@ The nf-core/variantmtb pipeline comes with documentation about the pipeline [usa ## Credits -nf-core/variantmtb was originally written by SusiJo. - -We thank the following people for their extensive assistance in the development of this pipeline: +nf-core/variantmtb was originally started by SusiJo, mainly developed by mapo9 and further contributions by HomoPolyethylen. - + + ## Contributions and Support @@ -82,9 +90,9 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - + - + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..5b57169 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/variantmtb v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 22c4e26..ddf3264 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,7 +4,7 @@ - + nf-core/variantmtb Pipeline Report @@ -12,7 +12,7 @@ -

nf-core/variantmtb v${version}

+

nf-core/variantmtb ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 52345ec..36debfe 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,9 +4,8 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/variantmtb v${version} + nf-core/variantmtb ${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 0000000..8807cbc --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,29 @@ +id: "nf-core-variantmtb-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/variantmtb Methods Description" +section_href: "https://github.com/nf-core/variantmtb" +plot_type: "html" +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/variantmtb v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

${tool_citations}

+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography} +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index cd22504..058c7d3 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,11 +1,15 @@ report_comment: > - This report has been generated by the nf-core/variantmtb + This report has been generated by the nf-core/variantmtb analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: - software_versions: + "nf-core-variantmtb-methods-description": order: -1000 - "nf-core-variantmtb-summary": + software_versions: order: -1001 + "nf-core-variantmtb-summary": + order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-variantmtb_logo_light.png b/assets/nf-core-variantmtb_logo_light.png index dad0587..a8b58b9 100644 Binary files a/assets/nf-core-variantmtb_logo_light.png and b/assets/nf-core-variantmtb_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..ffa8d14 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,filename,genome,filetype +sample_1,path/to/file_1.vcf,GRCh38,mutations +sample_2,path/to/file_2.vcf,GRCh38,mutations +sample_3,path/to/file_3.vcf,GRCh38,mutations diff --git a/assets/schema_input.json b/assets/schema_input.json index 7c30d56..74bc573 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,39 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, - "fastq_1": { + "filename": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(vcf|vcf.gz|tsv)$", + "errorMessage": "filename must be an existing VCF (for SNPs) or TSV (for CNAs) file" }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "genome": { + "type": "string", + "enum": ["hg19", "GRCh37", "hg38", "GRCh38"], + "errorMessage": "genome must be one of 'hg19', 'GRCh37', 'hg38' or 'GRCh38'", + "meta": ["ref"] + }, + "filetype": { + "type": "string", + "pattern": "(^mutations$)|(^cnas$)|(^translocations$)", + "errorMessage": "the filetype must be one of 'mutations', 'cnas' or 'translocations'", + "meta": ["filetype"] + }, + "cgi_cancer": { + "type": "string", + "errorMessage": "specify a valid OncoTree cancer name", + "meta": ["cgi_cancer"] + }, + "civic_cancer": { + "type": "string", + "errorMessage": "specify a valid Disease Ontology (DO) name or id", + "meta": ["civic_cancer"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "filename", "filetype"] } } diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..ad638c1 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/variantmtb ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 3652c63..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,260 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename the sample if more than one sample, - FASTQ file combination exists. - - """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - if len({pair[0] for pair in self._seen}) < len(self._seen): - counts = Counter(pair[0] for pair in self._seen) - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - if counts[sample] > 1: - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/actionability_filters.config b/conf/actionability_filters.config new file mode 100644 index 0000000..907cebf --- /dev/null +++ b/conf/actionability_filters.config @@ -0,0 +1,13 @@ +/* +This config file hold typical filters for the CIViC evidence when querying for actionability. +This file can also serve as a template for your custom filters. +*/ +process { + withName: 'QUERYNATOR_CIVICAPI' { + ext.args = {'--filter_evidence type=predictive \ + --filter_evidence significance=SensitivityResponse \ + --filter_evidence direction=supports \ + --filter_evidence status=accepted \ + --filter_evidence level=D'} + } +} diff --git a/conf/base.config b/conf/base.config index 7ff029c..73b9e63 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,12 +10,11 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -24,8 +23,12 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/igenomes.config b/conf/igenomes.config index 7a1b3ac..3f11437 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -36,6 +36,14 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" diff --git a/conf/modules.config b/conf/modules.config index da58a5d..abd9f89 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,23 +18,55 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SAMPLESHEET_CHECK { + withName: 'BCFTOOLS_NORM' { + ext.args = "--output-type z -a --atom-overlaps ." + ext.prefix = { "${meta.id}.normalized" } + publishDir = [ + enabled: false + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*_versions.yml' ] } - withName: FASTQC { - ext.args = '--quiet' + withName: QUERYNATOR_CGIAPI { + publishDir = [ + path: { "${params.outdir}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*' + ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { + withName: QUERYNATOR_CIVICAPI { publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + path: { "${params.outdir}/${meta.id}" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: '*' + ] + } + + withName: QUERYNATOR_CREATEREPORT { + publishDir = [ + path: { "${params.outdir}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*' + ] + } + + withName: TABIX_TABIX { + publishDir = [ + enabled: false + ] + } + + withName: TABIX_BGZIPTABIX { + publishDir = [ + enabled: false ] } diff --git a/conf/test.config b/conf/test.config index 59da7e2..a8e3457 100644 --- a/conf/test.config +++ b/conf/test.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_description = 'Minimal test dataset to check pipeline function, single KB, SNPs only' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -20,10 +20,12 @@ params { max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "${projectDir}/tests/csv/input.csv" // Genome references - genome = 'R64-1-1' + genome = 'GRCh37' + fasta = "s3://ngi-igenomes/igenomes/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + + // mandatory flags + databases = 'civic' } diff --git a/conf/test_full.config b/conf/test_full.config index c1dabea..ece9ea1 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,24 +1,35 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running full-size tests + Nextflow config file for running minimal tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a full size pipeline test. + Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/variantmtb -profile test_full, --outdir + nextflow run nf-core/variantmtb -profile test, --outdir ---------------------------------------------------------------------------------------- */ params { config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_description = 'TEST: Query all supported knowledgebases and input types using a reference genome identifier and specific cancer type. Some KBs might need access credentials.' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/tests/csv/input_cancer.csv" // Genome references - genome = 'R64-1-1' + genome = 'GRCh37' + + // mandatory flags + databases = 'civic,cgi' + + // optional flags } + +// test civic evidence filters +includeConfig 'actionability_filters.config' diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e4..0000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb..0000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf..0000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-variantmtb_logo_dark.png b/docs/images/nf-core-variantmtb_logo_dark.png index e9d1968..d03c76c 100644 Binary files a/docs/images/nf-core-variantmtb_logo_dark.png and b/docs/images/nf-core-variantmtb_logo_dark.png differ diff --git a/docs/images/nf-core-variantmtb_logo_light.png b/docs/images/nf-core-variantmtb_logo_light.png index 72bd82c..30fc530 100644 Binary files a/docs/images/nf-core-variantmtb_logo_light.png and b/docs/images/nf-core-variantmtb_logo_light.png differ diff --git a/docs/images/variantMTB_workflow.png b/docs/images/variantMTB_workflow.png new file mode 100644 index 0000000..0a2e65f Binary files /dev/null and b/docs/images/variantMTB_workflow.png differ diff --git a/docs/output.md b/docs/output.md index f970c98..4ad9568 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,63 +6,29 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - + ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [nf-core/variantmtb: Output](#nf-corevariantmtb-output) + - [Introduction](#introduction) + - [Pipeline overview](#pipeline-overview) + - [CGI](#cgi) + - [CIViC](#civic) + - [Report](#report) -### FastQC +### CGI -
-Output files +CGI is queried by accessing its RESTful API. It takes in a list of variants and returns several output files. +See the [querynator docs](https://querynator.readthedocs.io/en/latest/usage.html#query-the-cancergenomeinterpeter-cgi) for more information -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +### CIViC -
+CIViC is queried using the CIViCpy tool. It takes in single variants from a VCF and annotates them. +See the [querynator docs](https://querynator.readthedocs.io/en/latest/usage.html#query-the-clinical-interpretations-of-variants-in-cancer-civic) for more information -[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +### Report -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. - -### MultiQC - -
-Output files - -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. - -
- -[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. - -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . - -### Pipeline information - -
-Output files - -- `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - -
- -[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. +The resuls of the CIViC and CGI are query are combined. The variants are then categorized based on the guidelines proposed by the [AMP](https://www.sciencedirect.com/science/article/pii/S1525157816302239). diff --git a/docs/usage.md b/docs/usage.md index d61d16f..8e76075 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,49 +6,35 @@ ## Introduction - + ## Samplesheet input You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet may look something like the one below. ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,filename,genome,filetype,cgi_cancer,civic_cancer +sample_1,file_1.vcf,GRCh38,mutations,Breast adenocarcinoma,breast adenocarcinoma +sample_2,file_2.vcf,GRCh38,mutations,Cholangiocarcinoma,DOID:4947 +sample_3,file_3.vcf,GRCh38,mutations,Melanoma,1909 ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| -------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. | +| `filename` | Full path to VCF file. File can be gzipped and have the extension ".vcf.gz", ".vcf" or ".tsv".gz". | +| `genome` | The reference genome used in variant calling of this file. | +| `filetype` | Either "mutations" for variant files, "cnas" for copy number alterations files or "translocations" for translocation files. | +| `cgi_cancer` | The samples [OncoTree](http://oncotree.info/) cancer type. Used by CGI. | +| `civic_cancer` | The samples [Disease Ontology](https://disease-ontology.org/do) cancer type. Used by CIViC. Specify name or ID. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -57,25 +43,68 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/variantmtb --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run qbic-pipelines/variantmtb -r 1.0.0 \ + --input samplesheet.csv \ + --outdir \ + --genome \ + --fasta path/to/reference/file \ + --databases 'cgi, civic' \ + -profile docker \ ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -Note that the pipeline will create the following files in your working directory: +### Using CGI + +If you wish to query the [Cancer Genome Interpreter (CGI)](https://www.cancergenomeinterpreter.org/home), you will need to create a free account and provide your credentials to the pipeline via [nextflow secretes](https://www.nextflow.io/docs/latest/secrets.html). This step should only be required before the first run. ```console +export NXF_ENABLE_SECRETS=true +nextflow secrets set cgi_email my-account@whatever.com +nextflow secrets set cgi_token f08ffMYTOKEN97fr3 +``` + +All possible CGI cancer types can be found [here](https://github.com/qbic-pipelines/querynator/blob/master/querynator/query_api/cancertypes.js) + +Note that the pipeline will create the following files in your working directory: + +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/variantmtb -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/variantmtb ``` @@ -83,29 +112,42 @@ nextflow pull nf-core/variantmtb It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/variantmtb releases page](https://github.com/nf-core/variantmtb/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/variantmtb releases page](https://github.com/nf-core/variantmtb/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: ### `-profile` Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -116,11 +158,18 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. +- `actionability` + - holds common filters for civic evidences when only interested in actionability evidences + - contains the following filters for civic evidence, set via the `--filter_evidence` parameter: + - `type=predictive` + - `significance=SensitivityResponse` + - `direction=supports` + - `status=accepted` + - `level=D` ### `-resume` @@ -138,96 +187,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: +### Custom Containers - - For Docker: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Singularity: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - - For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` - -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs @@ -237,6 +209,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -251,6 +231,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 2fc0a9b..0000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,258 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 28567bd..0000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,40 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } - - // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) - - if (conda_check_failed) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index b87e9ed..0000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,94 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/variantmtb pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (params.enable_conda) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } - - // - // Get attribute from genome config file e.g. fasta - // - public static String getGenomeAttribute(params, attribute) { - def val = '' - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] - } - } - return val - } -} diff --git a/lib/WorkflowVariantmtb.groovy b/lib/WorkflowVariantmtb.groovy deleted file mode 100755 index 46b2327..0000000 --- a/lib/WorkflowVariantmtb.groovy +++ /dev/null @@ -1,59 +0,0 @@ -// -// This file holds several functions specific to the workflow/variantmtb.nf in the nf-core/variantmtb pipeline -// - -class WorkflowVariantmtb { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExistsError(params, log) - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 2dfa557..8803c8e 100644 --- a/main.nf +++ b/main.nf @@ -13,47 +13,91 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +include { VARIANTMTB } from './workflows/variantmtb' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_variantmtb_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_variantmtb_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { VARIANTMTB } from './workflows/variantmtb' - // -// WORKFLOW: Run main nf-core/variantmtb analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_VARIANTMTB { - VARIANTMTB () -} + take: + samplesheet // channel: samplesheet read in from --input + versions // channel: aggregates versions of software used in pipeline + + main: + + // + // WORKFLOW: Run pipeline + // + VARIANTMTB ( + samplesheet, + versions + ) + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_VARIANTMTB () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_VARIANTMTB ( + PIPELINE_INITIALISATION.out.samplesheet, + PIPELINE_INITIALISATION.out.versions + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url + ) } /* diff --git a/modules.json b/modules.json index 3b5cde5..0d9eb08 100644 --- a/modules.json +++ b/modules.json @@ -2,15 +2,59 @@ "name": "nf-core/variantmtb", "homePage": "https://github.com/nf-core/variantmtb", "repos": { - "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bcftools/norm": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "bcftools/view": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "f448e846bdadd80fc8be31fbbc78d9f5b5131a45", + "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + } + } }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "f533459a222ac53eb4c6bb7a5f574e4069197cdb", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/bcftools/splitvep.nf b/modules/local/bcftools/splitvep.nf new file mode 100644 index 0000000..ee2c6a7 --- /dev/null +++ b/modules/local/bcftools/splitvep.nf @@ -0,0 +1,43 @@ +process BCFTOOLS_SPLITVEP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + + input: + tuple val(meta), path(vcf), path(index) + + output: + + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + bcftools \\ + +split-vep \\ + $args \\ + --output ${prefix}.tsv \\ + $vcf + + # insert header + sed -i '1i #CHROM POS ID REF ALT AF IMPACT Gene SYMBOL Consequence SIFT PolyPhen HGVSc HGVSp RefSeq Existing_variation CLIN_SIG' ${prefix}.tsv + + # replace whitespace by tab + sed -i 's/\s/\t/g' ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/querynator/cgiapi.nf b/modules/local/querynator/cgiapi.nf new file mode 100644 index 0000000..7425f84 --- /dev/null +++ b/modules/local/querynator/cgiapi.nf @@ -0,0 +1,63 @@ +process QUERYNATOR_CGIAPI { + tag "$meta.id" + label 'process_low' + secret 'cgi_email' + secret 'cgi_token' + maxForks 1 + + conda "bioconda::querynator=0.6.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/querynator:0.6.0--pyhdfd78af_0': + 'quay.io/biocontainers/querynator:0.6.0--pyhdfd78af_0' }" + + + input: + + tuple val(meta), path(mutations), path(translocations), path(cnas), val(cancer), val(genome) + + output: + + tuple val(meta), path("${meta.id}_cgi") , emit: result_dir + tuple val(meta), path("${meta.id}_cgi/${meta.id}_cgi.cgi_results.zip") , emit: zip + tuple val(meta), path("${meta.id}_cgi/${meta.id}_cgi.cgi_results") , emit: cgi_results + tuple val(meta), path("${meta.id}_cgi/${meta.id}_cgi.cgi_results/*") , emit: results + tuple val(meta), path("${meta.id}_cgi/vcf_files") , emit: input_vcf_dir + tuple val(meta), path("${meta.id}_cgi/vcf_files/${meta.id}_cgi.filtered_variants.vcf") , emit: input_vcf_filtered + tuple val(meta), path("${meta.id}_cgi/vcf_files/${meta.id}_cgi.removed_variants.vcf") , emit: input_vcf_removed + + + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mutations_file = mutations ? "--mutations ${mutations}" : "" + def translocation_file = translocations ? "--translocations ${translocations}" : '' + def cnas_file = cnas ? "--cnas ${cnas}" : '' + def cancer = cancer ? cancer : 'Any cancer type' // default to any cancer type if not specified + + """ + export MPLCONFIGDIR=${workDir}/.config/matplotlib + + querynator query-api-cgi \\ + $mutations_file \\ + $translocation_file \\ + $cnas_file \\ + --outdir ${prefix}_cgi \\ + --cancer '$cancer' \\ + --genome $genome \\ + --token \${cgi_token} \\ + --email \${cgi_email} \\ + --filter_vep \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + querynator: \$(echo \$(querynator --version 2>&1) | sed 's/^.*querynator //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/querynator/civicapi.nf b/modules/local/querynator/civicapi.nf new file mode 100644 index 0000000..3d5c2b7 --- /dev/null +++ b/modules/local/querynator/civicapi.nf @@ -0,0 +1,55 @@ +process QUERYNATOR_CIVICAPI { + tag "$meta.id" + label 'process_low' + + conda "bioconda::querynator=0.6.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/querynator:0.6.0--pyhdfd78af_0': + 'quay.io/biocontainers/querynator:0.6.0--pyhdfd78af_0' }" + + + input: + + tuple val(meta), path(input_file) + + output: + + tuple val(meta), path("${meta.id}_civic") , emit: result_dir + tuple val(meta), path("${meta.id}_civic/${meta.id}_civic.civic_results.tsv") , emit: civic_table + tuple val(meta), path("${meta.id}_civic/vcf_files") , emit: input_vcf_dir + tuple val(meta), path("${meta.id}_civic/vcf_files/${meta.id}_civic.filtered_variants.vcf") , emit: input_vcf_filtered + tuple val(meta), path("${meta.id}_civic/vcf_files/${meta.id}_civic.removed_variants.vcf") , emit: input_vcf_removed + tuple val(meta), path("${meta.id}_civic/metadata.txt") , emit: metadata + + + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def cancer = meta.civic_cancer ? "--cancer '${meta.civic_cancer}'" : '' + + """ + # set path to civicpy cache + export CIVICPY_CACHE_FILE=${workDir}/.civicpy/cache.pkl + export MPLCONFIGDIR=${workDir}/.config/matplotlib + + # run querynator + querynator query-api-civic \\ + --vcf ${input_file} \\ + --outdir ${prefix}_civic \\ + --genome ${meta.ref} \\ + --filter_vep \\ + ${cancer} \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + querynator: \$(echo \$(querynator --version 2>&1) | sed 's/^.*querynator //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/querynator/createreport.nf b/modules/local/querynator/createreport.nf new file mode 100644 index 0000000..d5e2ca5 --- /dev/null +++ b/modules/local/querynator/createreport.nf @@ -0,0 +1,45 @@ +process QUERYNATOR_CREATEREPORT { + tag "$meta.id" + label 'process_low' + + conda "bioconda::querynator=0.6.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/querynator:0.6.0--pyhdfd78af_0': + 'quay.io/biocontainers/querynator:0.6.0--pyhdfd78af_0' }" + + + input: + + tuple val(meta), path(cgi_out), path(civic_out) + + output: + + tuple val(meta), path("${meta.id}_report") , emit: report_dir + tuple val(meta), path("${meta.id}_report/combined_files") , emit: combined_files_dir + tuple val(meta), path("${meta.id}_report/combined_files/*") , emit: combined_files + tuple val(meta), path("${meta.id}_report/report") , emit: report_html_dir + tuple val(meta), path("${meta.id}_report/report/*") , emit: report_files + + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + querynator create-report \\ + --cgi_path $cgi_out \\ + --civic_path $civic_out \\ + --outdir ${prefix}_report \\ + $args + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + querynator: \$(echo \$(querynator --version 2>&1) | sed 's/^.*querynator //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index e3821ff..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,27 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - script: // This script is bundled with the pipeline, in nf-core/variantmtb/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 0000000..5c00b11 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 0000000..bd7a250 --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,70 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 0000000..b6edeb4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,85 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 0000000..dbc4150 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,563 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 0000000..3be5211 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,758 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:42.639095032" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:05.448449893" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:12.741719961" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:22.875147941" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:15:23.38765384" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:21.519977754" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:27.8230994" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,f35545c26a788b5eb697d9c0490339d9" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:53.942403192" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:05.3799488" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:36:58.39445154" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:16.259516142" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:39:10.503208929" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:38:59.121377258" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:56:16.404380471" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions": [ + "versions.yml:md5,ff760495922469e56d0fc3372773000d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:37:42.141945244" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 0000000..b79af86 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 0000000..f36f397 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 0000000..510803b --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 0000000..10bf93e --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 0000000..b31dd2d --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/tags.yml b/modules/nf-core/bcftools/norm/tests/tags.yml new file mode 100644 index 0000000..f6f5e35 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/norm: + - "modules/nf-core/bcftools/norm/**" diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 0000000..7dd696e --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000..aebffb6 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000..b192ae7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 0000000..5c00b11 --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 0000000..7fe4303 --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,76 @@ +process BCFTOOLS_VIEW { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(index) + path(regions) + path(targets) + path(samples) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 0000000..aa7785f --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,88 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + - - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + - - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + - - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 0000000..1e60c50 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,298 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 0000000..fec22e3 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,333 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:38.717458272" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:44.760671384" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T16:06:21.669668533" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:14:53.026083914" + }, + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:14.663512924" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:19.723448323" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T08:24:36.358469315" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:15:09.588867653" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,241125d00357804552689c37bbabe1f5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T12:13:33.834986869" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 0000000..932e3ba --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/tags.yml b/modules/nf-core/bcftools/view/tests/tags.yml new file mode 100644 index 0000000..43b1f0a --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/view: + - "modules/nf-core/bcftools/view/**" diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config new file mode 100644 index 0000000..7dd696e --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000..aebffb6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000..b192ae7 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9d79af9 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 71% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d510..105f926 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 0000000..dc1e412 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline + template +keywords: + - custom + - dump + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline + template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] + identifier: "" +input: + - - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" +output: + - yml: + - software_versions.yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + - software_versions_mqc.yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 0000000..b83b32c --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + +import platform +from textwrap import dedent + +import yaml + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..c779485 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000..5e67e3b --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000..9066c03 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..776211a --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..069967e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 60b546a..0000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index d139039..0000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - -
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow
- - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index ed6b8c5..0000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,47 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml deleted file mode 100644 index 4da5bb5..0000000 --- a/modules/nf-core/modules/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac..0000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml deleted file mode 100644 index 6fa891e..0000000 --- a/modules/nf-core/modules/multiqc/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] -input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC -output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 0000000..017c259 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf new file mode 100644 index 0000000..22f37a7 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -0,0 +1,48 @@ +process TABIX_BGZIPTABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def args2 = task.ext.args2 ?: '' + def index = args2.contains("-C ") || args2.contains("--csi") ? "csi" : "tbi" + """ + echo "" | gzip > ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml new file mode 100644 index 0000000..806fbc1 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -0,0 +1,65 @@ +name: tabix_bgziptabix +description: bgzip a sorted tab-delimited genome file and then create tabix index +keywords: + - bgzip + - compress + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Sorted tab-delimited genome file +output: + - gz_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gz": + type: file + description: bgzipped tab-delimited genome file + pattern: "*.gz" + - "*.tbi": + type: file + description: tabix index file + pattern: "*.tbi" + - gz_csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gz": + type: file + description: bgzipped tab-delimited genome file + pattern: "*.gz" + - "*.csi": + type: file + description: csi index file + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test new file mode 100644 index 0000000..4d4130d --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + + name "Test Process TABIX_BGZIPTABIX" + script "modules/nf-core/tabix/bgziptabix/main.nf" + process "TABIX_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgziptabix" + + test("sarscov2_bed_tbi") { + config "./tabix_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'tbi_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_tbi[0][1]).name + ).match("tbi_test") + } + ) + } + } + + test("sarscov2_bed_csi") { + config "./tabix_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'csi_test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_test") + } + ) + } + + } + + test("sarscov2_bed_csi_stub") { + config "./tabix_csi.config" + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_stub") + } + ) + } + + } + + test("sarscov2_bed_tbi_stub") { + config "./tabix_tbi.config" + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_tbi[0][1]).name + ).match("tbi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 0000000..fb87799 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,206 @@ +{ + "sarscov2_bed_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ], + "gz_csi": [ + + ], + "gz_tbi": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c" + ] + ], + "versions": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:29:16.053817543" + }, + "sarscov2_bed_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" + ] + ], + "2": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ], + "gz_csi": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5" + ] + ], + "gz_tbi": [ + + ], + "versions": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T11:29:27.667745444" + }, + "csi_test": { + "content": [ + "csi_test.bed.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:51:00.548801" + }, + "sarscov2_bed_tbi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ], + "gz_csi": [ + + ], + "gz_tbi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-25T14:45:18.533169949" + }, + "csi_stub": { + "content": [ + "test.bed.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:51:09.218454" + }, + "tbi_stub": { + "content": [ + "test.bed.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-25T14:45:18.550930179" + }, + "tbi_test": { + "content": [ + "tbi_test.bed.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-02-19T14:50:51.579654" + }, + "sarscov2_bed_csi_stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ], + "gz_csi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gz_tbi": [ + + ], + "versions": [ + "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-25T14:44:19.786135972" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config new file mode 100644 index 0000000..fb41a31 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config new file mode 100644 index 0000000..c1915dc --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tags.yml b/modules/nf-core/tabix/bgziptabix/tests/tags.yml new file mode 100644 index 0000000..5052b4d --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgziptabix: + - "modules/nf-core/tabix/bgziptabix/**" diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 0000000..017c259 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 0000000..13acd67 --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,45 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 0000000..7864832 --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,58 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" +output: + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: tabix index file + pattern: "*.{tbi}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 0000000..102b0d7 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,136 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name).match() } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 0000000..c2b9ed0 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,212 @@ +{ + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "genome.gff3.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:25.653807564" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:09.754082161" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:40.042648294" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:07:08.700367261" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:55.362067748" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 0000000..7ff0590 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 0000000..20c0a1e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 0000000..eb4f2d7 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 0000000..2774c8a --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 0000000..6eda065 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/nextflow.config b/nextflow.config index ab17cf4..8b77349 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,41 +9,40 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null + + // Main options + fasta = null + genome = null + + // Knowledgebase options + databases = 'cgi, civic' // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - // MultiQC options - multiqc_config = null - multiqc_title = null - max_multiqc_email_size = '25.MB' - // Boilerplate options - outdir = null + outdir = 'results' tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + version = false // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null // Max resource options // Defaults only, expecting to be overwritten @@ -51,6 +50,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -64,63 +70,120 @@ try { } // Load nf-core/variantmtb custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/variantmtb.config" // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/variantmtb profiles: ${params.custom_config_base}/pipeline/variantmtb.config") // } - - profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - params.enable_conda = true + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true - docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + charliecloud.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + actionability { includeConfig 'conf/actionability_filters.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -129,7 +192,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -144,32 +206,36 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/variantmtb' - author = 'SusiJo' + author = """SusiJo, mapo9, HomoPolyethylen""" homePage = 'https://github.com/nf-core/variantmtb' - description = 'This pipeline queries variant databases to investigate the biological and predictive relevance of tumor variants' + description = """This pipeline queries variant databases to investigate the prognostic, diagnostic and predictive relevance of tumor variants""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '1.0dev' + nextflowVersion = '!>=23.04.0' + version = '1.0.0' + doi = '' } // Load modules.config for DSL2 module specific options @@ -207,3 +273,8 @@ def check_max(obj, type) { } } } + +// set conda channels +conda { + createOptions = "-c conda-forge -c bioconda" +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 9c3857e..c64727b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/variantmtb/master/nextflow_schema.json", "title": "nf-core/variantmtb pipeline parameters", - "description": "This pipeline queries variant databases to investigate the biological and predictive relevance of tumor variants", + "description": "This pipeline queries variant databases to investigate the prognostic, diagnostic and predictive relevance of tumor variants", "type": "object", "definitions": { "input_output_options": { @@ -15,9 +15,10 @@ "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/variantmtb/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -26,7 +27,8 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "results" }, "email": { "type": "string", @@ -34,11 +36,6 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" - }, - "multiqc_title": { - "type": "string", - "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", - "fa_icon": "fas fa-file-signature" } } }, @@ -51,26 +48,20 @@ "genome": { "type": "string", "description": "Name of iGenomes reference.", + "enum": ["hg19", "GRCh37", "hg38", "GRCh38"], "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "description": "Path to FASTA reference file. Must be provided when querying CIViC.", + "help_text": "Path to FASTA reference file. Must be provided when querying CIViC.", "fa_icon": "far fa-file-code" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt", - "hidden": true - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -80,6 +71,23 @@ } } }, + "knowledgebase_options": { + "title": "Knowledgebase options", + "type": "object", + "description": "Which knowledgebase to query and related options.", + "default": "", + "properties": { + "databases": { + "type": "string", + "description": "Databases which should be queried.", + "default": "cgi, civic", + "pattern": "^(cgi|civic)((,| )+(cgi|civic))*$", + "fa_icon": "fas fa-database" + } + }, + "required": ["databases"], + "fa_icon": "fas fa-database" + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -157,7 +165,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -176,6 +184,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -199,30 +213,23 @@ "fa_icon": "fas fa-remove-format", "hidden": true }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true - }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", "hidden": true }, - "multiqc_config": { + "hook_url": { "type": "string", - "description": "Custom config file to supply to MultiQC.", - "fa_icon": "fas fa-cog", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", + "default": "results/pipeline_info", "fa_icon": "fas fa-cogs", "hidden": true }, @@ -233,18 +240,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } @@ -256,6 +271,9 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/knowledgebase_options" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..5e4e3b7 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,6 @@ +config { + testsDir "." + workDir "testing/.nf-test" + configFile "conf/test.config" + profile "test" +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5611062 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.ruff] +line-length = 120 +target-version = "py38" +cache-dir = "~/.cache/ruff" + +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/utils_nfcore_variantmtb_pipeline/main.nf b/subworkflows/local/utils_nfcore_variantmtb_pipeline/main.nf new file mode 100644 index 0000000..d83c1ee --- /dev/null +++ b/subworkflows/local/utils_nfcore_variantmtb_pipeline/main.nf @@ -0,0 +1,250 @@ +// +// Subworkflow with functionality specific to the nf-core/variantmtb pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_list = ["Tools used in the workflow included:"] + citation_list += params.databases.contains("cgi") ? "CGI (Tamborero et al. 2018)" : "" + citation_list += params.databases.contains("civic") ? "CIViC (Grifith et al. 2017)" : "" + citation_list += params.databases.contains("civic") ? "CIViCpy (Wagner et al. 2020)" : "" + def citation_text = citation_list.join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_list = [] + reference_list += params.databases.contains("cgi") ? "
  • Tamborero, D., Rubio-Perez, C., Deu-Pons, J., Schroeder, M. P., Vivancos, A., Rovira, A., ... & Lopez-Bigas, N. (2018). Cancer Genome Interpreter annotates the biological and clinical relevance of tumor alterations. Genome medicine, 10, 1-8. https://doi.org/10.1186/s13073-018-0531-8
  • " : "" + reference_list += params.databases.contains("civic") ? "
  • Griffith, M., Spies, N. C., Krysiak, K., McMichael, J. F., Coffman, A. C., Danos, A. M., ... & Griffith, O. L. (2017). CIViC is a community knowledgebase for expert crowdsourcing the clinical interpretation of variants in cancer. Nature genetics, 49(2), 170-174. https://doi.org/10.1038/ng.3774
  • " : "" + reference_list += params.databases.contains("civic") ? "
  • Wagner, A. H., Kiwala, S., Coffman, A. C., McMichael, J. F., Cotto, K. C., Mooney, T. B., ... & Griffith, M. (2020). CIViCpy: a python software development and analysis toolkit for the CIViC knowledgebase. JCO Clinical Cancer Informatics, 4, 245-253. https://doi.org/10.1200/CCI.19.00127
  • " : "" + + def reference_text = reference_list.join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +// Function to get list of [ meta, inputfile, genome, filetype ] +def create_input_channel(LinkedHashMap row) { + + // check if input file is compressed + def compressed_check = file(row.filename).extension == "gz" ? "compressed" : "uncompressed" + + // create meta map + def meta = [:] + meta.id = row.sample + + // add path(s) of the input file(s) to the meta map + def input_meta = [ meta, file(row.filename), row.genome, row.filetype, compressed_check ] + + return input_meta +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..b17b877 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,116 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn( + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + " Please refer to https://bioconda.github.io/\n" + " The observed channel order is \n" + " ${channels}\n" + " but the following channel order is required:\n" + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + ) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..a09572e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..5cb7baf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,462 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + } + mqc_report = mqc_report[0] + } + } + } + catch (Exception all) { + if (multiqc_report) { + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { +new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 0000000..2398c62 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,61 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-VALIDATION PLUGIN +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug("Using schema file: ${schema_filename}") + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info(pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text) + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info(pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text) + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params) { + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 0000000..3d4a6b0 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 0000000..c50b1fb --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = false + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..7626c1c --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 0000000..60b1cff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tests/csv/input.csv b/tests/csv/input.csv new file mode 100644 index 0000000..b83c8ff --- /dev/null +++ b/tests/csv/input.csv @@ -0,0 +1,2 @@ +sample,filename,genome,filetype +test_sample,tests/test_files/test.vcf,GRCh37,mutations diff --git a/tests/csv/input_SNPs-CNAs.csv b/tests/csv/input_SNPs-CNAs.csv new file mode 100644 index 0000000..c2f1111 --- /dev/null +++ b/tests/csv/input_SNPs-CNAs.csv @@ -0,0 +1,3 @@ +sample,filename,genome,filetype +test_sample,tests/test_files/test.vcf.gz,GRCh37,mutations +test_sample,tests/test_files/cnas.tsv,GRCh37,cnas diff --git a/tests/csv/input_cancer.csv b/tests/csv/input_cancer.csv new file mode 100644 index 0000000..33f57ef --- /dev/null +++ b/tests/csv/input_cancer.csv @@ -0,0 +1,4 @@ +sample,filename,genome,filetype,cgi_cancer,civic_cancer +test_sample-A,tests/test_files/test.vcf,GRCh37,mutations,Breast adenocarcinoma,breast adenocarcinoma +test_sample-B,tests/test_files/test.vcf,GRCh37,mutations,Cholangiocarcinoma,Cholangiocarcinoma +test_sample-C,tests/test_files/test.vcf,GRCh37,mutations,Brain,Brain Stem Cancer diff --git a/tests/test_default.yml b/tests/test_default.yml new file mode 100644 index 0000000..e6fda2a --- /dev/null +++ b/tests/test_default.yml @@ -0,0 +1,11 @@ +- name: Run default pipeline + command: nextflow run main.nf -profile test,docker + tags: + - default + - preprocessing + files: + - path: results/test_sample/test_sample_civic/metadata.txt + - path: results/test_sample/test_sample_civic/test_sample_civic.civic_results.tsv + - path: results/test_sample/test_sample_civic/vcf_files/test_sample_civic.filtered_variants.vcf + - path: results/test_sample/test_sample_civic/vcf_files/test_sample_civic.removed_variants.vcf + - path: results/test_sample/versions.yml diff --git a/tests/test_files/cnas.tsv b/tests/test_files/cnas.tsv new file mode 100644 index 0000000..9c8993b --- /dev/null +++ b/tests/test_files/cnas.tsv @@ -0,0 +1,4 @@ +sample gene cna +test_1 ERBB2 amp +test_1 TP53 del +test_1 EGFR amp diff --git a/tests/test_files/test.vcf b/tests/test_files/test.vcf new file mode 100644 index 0000000..121f926 --- /dev/null +++ b/tests/test_files/test.vcf @@ -0,0 +1,59 @@ +##fileformat=VCFv4.2 +##source=DoCM - The Database of Curated Mutations +##fileDate=20221026 +##reference=GRCh37 +##INFO= +##INFO= +##VEP="v106" time="2023-05-16 11:52:20" cache="/.vep/homo_sapiens/106_GRCh37" ensembl=106.f4b50c6 ensembl-funcgen=106.027e023 ensembl-io=106.6eafdaa ensembl-variation=106.2aa7a5d 1000genomes="phase3" COSMIC="92" ClinVar="202012" HGMD-PUBLIC="20204" assembly="GRCh37.p13" dbSNP="154" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2" +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +2 29432664 . C A . . DISEASE=neuroblastoma,breast_cancer,neuroblastoma,breast_cancer,neuroblastoma,breast_cancer;PMID=26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|ALK|ENSG00000171094|Transcript|ENST00000389048|protein_coding|25/29||||4731/6220|3824/4863|1275/1620|R/L|cGa/cTa|rs113994087&CM085230&COSV66555567&COSV66558183||-1||SNV|HGNC|427|YES|||||CCDS33172.1|ENSP00000373700|ALK_HUMAN|Q580I3_HUMAN|UPI00001684DA||1|deleterious(0)|probably_damaging(1)|Gene3D:1.10.510.10&Pfam:PF07714&PROSITE_profiles:PS50011&PANTHER:PTHR24416&PANTHER:PTHR24416:SF276&SMART:SM00219&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&0&1&1|1&1&1&1|26619011&18724359&18923523&18923524&18923525&20301782&21242967&21838707&22071890&22072639&30755224||||| +2 158626897 . C A . . DISEASE=brain_stem_glioma,gastric_adenocarcinoma,brain_stem_glioma,gastric_adenocarcinoma,brain_stem_glioma,gastric_adenocarcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|ACVR1|ENSG00000115170|Transcript|ENST00000263640|protein_coding|7/11||||1203/3045|773/1530|258/509|R/M|aGg/aTg|rs1057519875&COSV55118829||-1||SNV|HGNC|171|YES|||||CCDS2206.1|ENSP00000263640|ACVR1_HUMAN|D3DPA4_HUMAN&Q53SV1_HUMAN&Q53SF4_HUMAN&F5GY91_HUMAN&C9JW28_HUMAN&C9JHJ7_HUMAN&C9J1R3_HUMAN|UPI000000163F||1|deleterious(0)|probably_damaging(0.997)|Gene3D:3.30.200.20&Pfam:PF00069&PROSITE_profiles:PS50011&PANTHER:PTHR23255&PANTHER:PTHR23255:SF56&SMART:SM00220&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1|1&1|26619011||||| +2 198267359 . C G . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia;PMID=23395771,23395771,23395771,23395771,23395771,23395771,23395771,23395771;CSQ=G|missense_variant|MODERATE|SF3B1|ENSG00000115524|Transcript|ENST00000335508|protein_coding|14/25||||2090/6526|1998/3915|666/1304|K/N|aaG/aaC|rs377023736&COSV59205777&COSV59205799||-1||SNV|HGNC|10768|YES|||||CCDS33356.1|ENSP00000335321|SF3B1_HUMAN|Q9NTB4_HUMAN&F8WC19_HUMAN|UPI000013D493||1|deleterious(0)|probably_damaging(0.992)|Gene3D:1.25.10.10&PANTHER:PTHR12097&Superfamily:SSF48371||||||||0|0.0001163|3.979e-05|0|5.785e-05|0|0|0|6.159e-05|0|3.267e-05|0.0001163|EA|1KG_ALL:G:NA|likely_pathogenic|0&1&1|1&1&1|29641532&23395771&21909114&21995386|||||,G|upstream_gene_variant|MODIFIER|SNORA4|ENSG00000202434|Transcript|ENST00000365564|snoRNA||||||||||rs377023736&COSV59205777&COSV59205799|2083|1||SNV|RFAM||YES||||||||||||||||||||||0|0.0001163|3.979e-05|0|5.785e-05|0|0|0|6.159e-05|0|3.267e-05|0.0001163|EA|1KG_ALL:G:NA|likely_pathogenic|0&1&1|1&1&1|29641532&23395771&21909114&21995386||||| +2 209113112 . C A . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,brain_glioma,glioblastoma_multiforme,acute_myeloid_leukemia,skin_melanoma,prostate_adenocarcinoma,hepatocellular_carcinoma,multiple_myeloma,colorectal_cancer,lung_adenocarcinoma,brain_stem_glioma,myelodysplastic_syndrome,medulloblastoma,breast_cancer,urinary_bladder_urothelial_carcinoma,salivary_gland_adenoid_cystic_carcinoma,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,brain_glioma,glioblastoma_multiforme,acute_myeloid_leukemia,skin_melanoma,prostate_adenocarcinoma,hepatocellular_carcinoma,multiple_myeloma,colorectal_cancer,lung_adenocarcinoma,brain_stem_glioma,myelodysplastic_syndrome,medulloblastoma,breast_cancer,urinary_bladder_urothelial_carcinoma,salivary_gland_adenoid_cystic_carcinoma,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,brain_glioma,glioblastoma_multiforme,acute_myeloid_leukemia,skin_melanoma,prostate_adenocarcinoma,hepatocellular_carcinoma,multiple_myeloma,colorectal_cancer,lung_adenocarcinoma,brain_stem_glioma,myelodysplastic_syndrome,medulloblastoma,breast_cancer,urinary_bladder_urothelial_carcinoma,salivary_gland_adenoid_cystic_carcinoma;PMID=22160010,22397365,22417203,22898539,22160010,22397365,22417203,22898539,22160010,22397365,22417203,22898539,22160010,22397365,22417203,22898539,22160010,22397365,22417203,22898539,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,22160010,22397365,22417203,22898539,22160010,22397365,22417203,22898539,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,22160010,22397365,22417203,22898539,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|IDH1|ENSG00000138413|Transcript|ENST00000415913|protein_coding|4/10||||777/2441|395/1245|132/414|R/L|cGt/cTt|rs121913500&CM1310533&COSV61615239&COSV61615420||-1||SNV|HGNC|5382|YES|||||CCDS2381.1|ENSP00000390265|IDHC_HUMAN|Q6FHQ6_HUMAN&Q0QER2_HUMAN&C9JLU6_HUMAN&C9JJE5_HUMAN&C9J4N6_HUMAN&B4DXS4_HUMAN|UPI000012D1B4||1|deleterious_low_confidence(0)|benign(0.049)|Superfamily:SSF53659&PIRSF:PIRSF000108&Pfam:PF00180&Gene3D:3.40.718.10&TIGRFAM:TIGR00127&PANTHER:PTHR11822|||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&0&1&1|1&1&1&1|25157968&26619011&22160010&22397365&22417203&24606448&22898539&19657110&18772396&19798509&19818334&21446021&23558169&25043048&29066617||||| +3 12626123 . G C . . DISEASE=Noonan_syndrome,Noonan_syndrome,Noonan_syndrome,Noonan_syndrome,Noonan_syndrome,Noonan_syndrome;PMID=17603482,17603489,17603482,17603489,17603482,17603489;CSQ=C|missense_variant|MODERATE|RAF1|ENSG00000132155|Transcript|ENST00000251849|protein_coding|17/17||||2277/3300|1837/1947|613/648|L/V|Cta/Gta|rs80338797&CM073302&COSV50103946||-1||SNV|HGNC|9829|YES|||||CCDS2612.1|ENSP00000251849|RAF1_HUMAN|L7RRS6_HUMAN|UPI0000049CFC||1|deleterious(0.01)|benign(0.205)|Gene3D:1.10.510.10&PANTHER:PTHR23257&PANTHER:PTHR23257:SF42|||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&0&1|1&1&1|24033266&17603482&20301557&19953625&17603483&20052757&17603489&19933846|||||,C|downstream_gene_variant|MODIFIER|MKRN2|ENSG00000075975|Transcript|ENST00000170447|protein_coding||||||||||rs80338797&CM073302&COSV50103946|911|1||SNV|HGNC|7113|YES|||||CCDS33702.1|ENSP00000170447|MKRN2_HUMAN||UPI000007116E||||||||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&0&1|1&1&1|24033266&17603482&20301557&19953625&17603483&20052757&17603489&19933846||||| +3 41266125 . C A . . DISEASE=hepatocellular_carcinoma,colorectal_cancer,uterine_corpus_endometrial_carcinoma,skin_melanoma,prostate_adenocarcinoma,pancreas_adenocarcinoma,lung_adenocarcinoma,adrenocortical_carcinoma,hepatocellular_carcinoma,colorectal_cancer,uterine_corpus_endometrial_carcinoma,skin_melanoma,prostate_adenocarcinoma,pancreas_adenocarcinoma,lung_adenocarcinoma,adrenocortical_carcinoma,hepatocellular_carcinoma,colorectal_cancer,uterine_corpus_endometrial_carcinoma,skin_melanoma,prostate_adenocarcinoma,pancreas_adenocarcinoma,lung_adenocarcinoma,adrenocortical_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|CTNNB1|ENSG00000168036|Transcript|ENST00000349496|protein_coding|3/15||||402/3729|122/2346|41/781|T/N|aCc/aAc|rs121913413&COSV62688008&COSV62688560&COSV62714333||1||SNV|HGNC|2514|YES|||||CCDS2694.1|ENSP00000344456|CTNB1_HUMAN|G9GAG7_HUMAN&E9PDF9_HUMAN&E7EV28_HUMAN&E7EMJ5_HUMAN&C9IZ65_HUMAN&B4DGU4_HUMAN|UPI000012862F||1|deleterious(0.03)|possibly_damaging(0.468)|PANTHER:PTHR23315&PANTHER:PTHR23315:SF53|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011&10192393||||| +3 41266136 . T G . . DISEASE=brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma,brain_cancer,adrenal_cortical_adenoma;PMID=25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968,25157968;CSQ=G|missense_variant|MODERATE|CTNNB1|ENSG00000168036|Transcript|ENST00000349496|protein_coding|3/15||||413/3729|133/2346|45/781|S/A|Tct/Gct|rs121913407&COSV62687880&COSV62689248&COSV62696859||1||SNV|HGNC|2514|YES|||||CCDS2694.1|ENSP00000344456|CTNB1_HUMAN|G9GAG7_HUMAN&E9PDF9_HUMAN&E7EV28_HUMAN&E7EMJ5_HUMAN&C9IZ65_HUMAN&B4DGU4_HUMAN|UPI000012862F||1|deleterious(0.02)|benign(0.403)|PANTHER:PTHR23315&PANTHER:PTHR23315:SF53|||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic|0&1&1&1|1&1&1&1|25157968&26619011&15133491&9065403&10027390&11351304&11930117&11950921&12124804&10435629||||| +3 178921552 . A C . . DISEASE=breast_cancer,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,lung_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,glioblastoma_multiforme,salivary_gland_adenoid_cystic_carcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,lung_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,glioblastoma_multiforme,salivary_gland_adenoid_cystic_carcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,lung_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,glioblastoma_multiforme,salivary_gland_adenoid_cystic_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=C|missense_variant|MODERATE|PIK3CA|ENSG00000121879|Transcript|ENST00000263967|protein_coding|5/21||||1191/9093|1034/3207|345/1068|N/T|aAt/aCt|rs1057519938&CM1610987&COSV55881772&COSV55902438&COSV55902706||1||SNV|HGNC|8975|YES|||||CCDS43171.1|ENSP00000263967|PK3CA_HUMAN|Q4LE51_HUMAN&C9JAM9_HUMAN&C9J951_HUMAN|UPI000013D494||1|deleterious(0.05)|probably_damaging(0.997)|Gene3D:2.60.40.150&PROSITE_profiles:PS51547&PANTHER:PTHR10048&PANTHER:PTHR10048:SF54&SMART:SM00142&Superfamily:SSF49562|||||||||||||||||||||1KG_ALL:C:NA|likely_pathogenic|0&0&1&1&1|1&1&1&1&1|26619011||||| +3 178928226 . C T . . DISEASE=Merkel_cell_carcinoma,Merkel_cell_carcinoma,Merkel_cell_carcinoma,Merkel_cell_carcinoma,Merkel_cell_carcinoma,Merkel_cell_carcinoma;PMID=26466009,26466009,26466009,26466009,26466009,26466009;CSQ=T|missense_variant|MODERATE|PIK3CA|ENSG00000121879|Transcript|ENST00000263967|protein_coding|9/21||||1569/9093|1412/3207|471/1068|P/L|cCa/cTa|rs1057519872&COSV55911123||1||SNV|HGNC|8975|YES|||||CCDS43171.1|ENSP00000263967|PK3CA_HUMAN|Q4LE51_HUMAN&C9JAM9_HUMAN&C9J951_HUMAN|UPI000013D494||1|deleterious(0)|possibly_damaging(0.824)|Gene3D:2.60.40.150&Pfam:PF00792&PROSITE_profiles:PS51547&PANTHER:PTHR10048&PANTHER:PTHR10048:SF54&Superfamily:SSF49562|||||||||||||||||||||1KG_ALL:T:NA|not_provided|0&1|1&1|26466009||||| +3 178936091 . G A . . DISEASE=colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,colorectal_cancer,head_and_neck_squamous_cell_carcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,lung_squamous_cell_carcinoma,lung_adenocarcinoma,uterine_carcinosarcoma,glioblastoma_multiforme,esophageal_carcinoma,skin_melanoma,prostate_adenocarcinoma,brain_glioma,renal_clear_cell_carcinoma,brain_stem_glioma,gallbladder_carcinoma,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,nasopharynx_carcinoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,papillary_renal_cell_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,colorectal_cancer,head_and_neck_squamous_cell_carcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,lung_squamous_cell_carcinoma,lung_adenocarcinoma,uterine_carcinosarcoma,glioblastoma_multiforme,esophageal_carcinoma,skin_melanoma,prostate_adenocarcinoma,brain_glioma,renal_clear_cell_carcinoma,brain_stem_glioma,gallbladder_carcinoma,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,nasopharynx_carcinoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,papillary_renal_cell_carcinoma,lung_adenocarcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,breast_cancer,breast_cancer,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,breast_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,breast_cancer,ovarian_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,colorectal_cancer,head_and_neck_squamous_cell_carcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,lung_squamous_cell_carcinoma,lung_adenocarcinoma,uterine_carcinosarcoma,glioblastoma_multiforme,esophageal_carcinoma,skin_melanoma,prostate_adenocarcinoma,brain_glioma,renal_clear_cell_carcinoma,brain_stem_glioma,gallbladder_carcinoma,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,nasopharynx_carcinoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,papillary_renal_cell_carcinoma,lung_adenocarcinoma,colorectal_cancer;PMID=15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,23066039,25157968,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,25157968,23888070,19223544,18725974,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,25157968,23888070,19223544,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,25157968,23888070,19223544,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,25157968,23888070,23066039,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,25157968,23888070,19223544,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,16906227,15016963,15016963,15254419,15254419,15254419,15647370,15647370,15647370,15647370,15805248,16906227,16930767,18676830,18725974,18725974,18725974,19029981,19366826,19513541,19903786,20453058,20453058,20453058,20619739,21430269,22162582,22162582,22162589,22162589,22162589,22271473,23066039,25157968,23888070,19223544,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,16906227,19223544;CSQ=A|missense_variant|MODERATE|PIK3CA|ENSG00000121879|Transcript|ENST00000263967|protein_coding|10/21||||1790/9093|1633/3207|545/1068|E/K|Gag/Aag|rs104886003&CM126692&COSV55873239&COSV55878227||1||SNV|HGNC|8975|YES|||||CCDS43171.1|ENSP00000263967|PK3CA_HUMAN|Q4LE51_HUMAN&C9JAM9_HUMAN&C9J951_HUMAN|UPI000013D494||1|deleterious(0.02)|probably_damaging(0.909)|Gene3D:1.25.40.70&Pfam:PF00613&PROSITE_profiles:PS51545&PANTHER:PTHR10048&PANTHER:PTHR10048:SF54&SMART:SM00145&Superfamily:SSF48371||||||||||4.032e-06|0|0|0|0|0|8.879e-06|0|0|8.879e-06|gnomAD_NFE|1KG_ALL:A:NA|pathogenic&pathogenic/likely_pathogenic¬_provided&likely_pathogenic|0&0&1&1|1&1&1&1|30089490&25157968&26619011&20619739&21430269&15016963&21264207&19029981&22729224&15254419&15647370&18725974&19366826&19903786&20453058&22162582&22162589&15805248&18676830&15608678&22729223&15520168&16906227&17673550&19223544&19513541&22271473&23408298&23888070&23946963&31949278||||| +4 1803568 . C G . . DISEASE=bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma,bladder_carcinoma;PMID=19381019,19381019,25157968,19381019,25157968,19381019,25157968,19381019,25157968,19381019,25157968,19381019,25157968,19381019,25157968;CSQ=G|missense_variant|MODERATE|FGFR3|ENSG00000068078|Transcript|ENST00000340107|protein_coding|7/18||||1002/4293|746/2427|249/808|S/C|tCc/tGc|rs121913483&CM950470&COSV53390026||1||SNV|HGNC|3690|YES|||||CCDS54706.1|ENSP00000339824|FGFR3_HUMAN|Q8NI16_HUMAN|UPI000002A9AC||1|deleterious(0.01)|probably_damaging(0.978)|PIRSF:PIRSF000628&PANTHER:PTHR24416&PANTHER:PTHR24416:SF128||||||||||0|0|0|0|0|0|0|0|0|0|gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS|1KG_ALL:G:NA|likely_pathogenic&pathogenic|0&0&1|1&1&1|25741868&25157968&26619011&30616520&10471491&20301540&19381019&15772091&8589699&29700339||||| +4 55593610 . T A . . DISEASE=melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,gastrointestinal_stromal_tumor,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,melanoma,gastrointestinal_stromal_tumor;PMID=7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,23582185,21689725,17259998,25157968,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,23582185,21689725,17259998,25157968,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,23582185,21689725,17259998,25157968,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,23582185,21689725,17259998,25157968,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,25157968,23582185,21689725,17259998,16731599,16046538,23582185,21689725,17259998,25157968,16731599,7530509,9438854,11276010,15790786,16908931,17372901,18936790,18980976,19671763,19812602,21642685,21690468,16046538,23582185,21689725,17259998,25157968,16731599;CSQ=A|missense_variant|MODERATE|KIT|ENSG00000157404|Transcript|ENST00000288135|protein_coding|11/21||||1773/5186|1676/2931|559/976|V/D|gTt/gAt|rs121913517&CM013551&COSV55386973&COSV55388782&COSV55393324||1||SNV|HGNC|6342|YES|||||CCDS3496.1|ENSP00000288135|KIT_HUMAN|Q8TCG9_HUMAN|UPI000003F17D||1|deleterious(0)|probably_damaging(0.996)|PANTHER:PTHR24416&PANTHER:PTHR24416:SF46&Gene3D:3.30.200.20&PIRSF:PIRSF000615&PIRSF:PIRSF500951|||||||||||||||||||||1KG_ALL:A:NA|pathogenic&pathogenic/likely_pathogenic|0&0&1&1&1|1&1&1&1&1|25157968&9438854&7530509&11276010&12960119&15790786&16731599&17372901&18936790&18980976&19671763&19812602&21642685&21690468&22357254&17259998&21689725&23582185&16908931&11505412&16046538&20736294&21159146&24531699&24661573&24755198||||| +5 176522416 . C A . . DISEASE=rhabdomyosarcoma,rhabdomyosarcoma,rhabdomyosarcoma,rhabdomyosarcoma,rhabdomyosarcoma,rhabdomyosarcoma,rhabdomyosarcoma;PMID=24124571,24124571,24124571,24124571,24124571,24124571,24124571;CSQ=A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00002023482|promoter||||||||||rs1057519792&COSV52800481&COSV52801581||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1|1&1&1|24124571|||||,A|missense_variant|MODERATE|FGFR4|ENSG00000160867|Transcript|ENST00000292408|protein_coding|12/18||||1850/3122|1605/2409|535/802|N/K|aaC/aaA|rs1057519792&COSV52800481&COSV52801581||1||SNV|HGNC|3691|YES|||||CCDS4410.1|ENSP00000292408|FGFR4_HUMAN|G3JVM2_HUMAN&E7EWF4_HUMAN&D6RJD4_HUMAN&D6RG06_HUMAN&D6R9V0_HUMAN|UPI000012A72D||1|deleterious(0)|probably_damaging(1)|PROSITE_profiles:PS50011&PANTHER:PTHR24416&PANTHER:PTHR24416:SF312&Gene3D:3.30.200.20&Pfam:PF07714&SMART:SM00219&PIRSF:PIRSF000628&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1|1&1&1|24124571||||| +6 152332832 . G C . . DISEASE=breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer;PMID=24185512,24185510,24398047,24185512,24185510,24398047,24185512,24185510,24398047,24185512,24185510,24398047,24185512,24185510,24398047,24185512,24185510,24398047,24185512,24185510,24398047;CSQ=C|missense_variant|MODERATE|ESR1|ENSG00000091831|Transcript|ENST00000440973|protein_coding|7/10||||1508/6466|1138/1788|380/595|E/Q|Gaa/Caa|rs1057519827&COSV52782264||1||SNV|HGNC|3467|YES|||||CCDS5234.1|ENSP00000405330|ESR1_HUMAN|Q9UE35_HUMAN&Q9UBT1_HUMAN&Q5T5H8_HUMAN&K7R989_HUMAN&K4Q331_HUMAN&K4PXX0_HUMAN&K4PX46_HUMAN&G4XH65_HUMAN&E3WH18_HUMAN&C8CJL0_HUMAN&C7FFR5_HUMAN&B6DU67_HUMAN|UPI000004A328||1|deleterious(0)|probably_damaging(0.994)|PANTHER:PTHR24084:SF3&PANTHER:PTHR24084&Gene3D:1.10.565.10&Pfam:PF00104&SMART:SM00430&PIRSF:PIRSF500101&PIRSF:PIRSF002527&Superfamily:SSF48508&Prints:PR00398|||||||||||||||||||||1KG_ALL:C:NA|likely_pathogenic|0&1|1&1|24185512&24185510&24398047||||| +7 6426892 . C A . . DISEASE=skin_melanoma,head_and_neck_squamous_cell_carcinoma,uterine_corpus_endometrial_carcinoma,skin_squamous_cell_carcinoma,skin_melanoma,head_and_neck_squamous_cell_carcinoma,uterine_corpus_endometrial_carcinoma,skin_squamous_cell_carcinoma,skin_melanoma,head_and_neck_squamous_cell_carcinoma,uterine_corpus_endometrial_carcinoma,skin_squamous_cell_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|RAC1|ENSG00000136238|Transcript|ENST00000356142|protein_coding|2/7||||282/907|85/636|29/211|P/T|Cct/Act|rs1057519874&COSV61821563&COSV61823187||1||SNV|HGNC|9801|YES|||||CCDS5349.1|ENSP00000348461|RAC1_HUMAN|A4D2P0_HUMAN|UPI000002B20E||1|deleterious(0.01)|possibly_damaging(0.865)|Gene3D:3.40.50.300&Pfam:PF00071&Prints:PR00449&PROSITE_profiles:PS51420&PANTHER:PTHR24072&PANTHER:PTHR24072:SF107&SMART:SM00173&SMART:SM00174&SMART:SM00175&Superfamily:SSF52540&TIGRFAM:TIGR00231|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1|1&1&1|26619011&25056119||||| +7 55231430 . C T . . DISEASE=head_and_neck_cancer,head_and_neck_cancer,head_and_neck_cancer,head_and_neck_cancer,head_and_neck_cancer,head_and_neck_cancer,head_and_neck_cancer;PMID=23578570,23578570,23578570,23578570,23578570,23578570,23578570;CSQ=T|missense_variant|MODERATE|EGFR|ENSG00000146648|Transcript|ENST00000275493|protein_coding|14/28||||1813/9821|1636/3633|546/1210|P/S|Cca/Tca|rs1057519830&COSV51837871||1||SNV|HGNC|3236|YES|||||CCDS5514.1|ENSP00000275493|EGFR_HUMAN|Q75MF2_HUMAN&I3WA73_HUMAN&I3WA72_HUMAN&G9MC81_HUMAN&F1JTL6_HUMAN&E9PFD7_HUMAN&C9JYS6_HUMAN&A7VN06_HUMAN|UPI000003E750||1|tolerated(0.43)|benign(0.073)|Superfamily:SSF57184&PIRSF:PIRSF000619&SMART:SM00261&Gene3D:2.10.220.10&Pfam:PF14843&PANTHER:PTHR24416:SF91&PANTHER:PTHR24416|||||||||||||||||||||1KG_ALL:T:NA|likely_pathogenic|0&1|1&1|23578570||||| +7 55233043 . G T . . DISEASE=glioblastoma_multiforme,brain_glioma,brain_stem_glioma,glioblastoma_multiforme,brain_glioma,brain_stem_glioma,glioblastoma_multiforme,brain_glioma,brain_stem_glioma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=T|missense_variant|MODERATE|EGFR|ENSG00000146648|Transcript|ENST00000275493|protein_coding|15/28||||1970/9821|1793/3633|598/1210|G/V|gGa/gTa|rs139236063&COSV51769031&COSV51808387||1||SNV|HGNC|3236|YES|||||CCDS5514.1|ENSP00000275493|EGFR_HUMAN|Q75MF2_HUMAN&I3WA73_HUMAN&I3WA72_HUMAN&G9MC81_HUMAN&F1JTL6_HUMAN&E9PFD7_HUMAN&C9JYS6_HUMAN&A7VN06_HUMAN|UPI000003E750||1|deleterious(0)|probably_damaging(0.951)|Superfamily:SSF57184&PIRSF:PIRSF000619&SMART:SM00261&Gene3D:2.10.220.10&Pfam:PF14843&PANTHER:PTHR24416:SF91&PANTHER:PTHR24416||||||||||3.977e-06|0|0|0|0|0|8.793e-06|0|0|8.793e-06|gnomAD_NFE|1KG_ALL:T:NA|likely_pathogenic|0&1&1|1&1&1|26619011&17177598||||| +7 55241620 . G A . . DISEASE=endometrial_adenocarcinoma,endometrial_adenocarcinoma,endometrial_adenocarcinoma,endometrial_adenocarcinoma,endometrial_adenocarcinoma,endometrial_adenocarcinoma,endometrial_adenocarcinoma;PMID=22885469,22885469,22885469,22885469,22885469,22885469,22885469;CSQ=A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001726611|promoter_flanking_region||||||||||rs1057519794||||SNV|||||||||||||||||||||||||||||||||||||||likely_pathogenic||1|22885469|||||,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00002050855|CTCF_binding_site||||||||||rs1057519794||||SNV|||||||||||||||||||||||||||||||||||||||likely_pathogenic||1|22885469|||||,A|missense_variant|MODERATE|EGFR|ENSG00000146648|Transcript|ENST00000275493|protein_coding|18/28||||2245/9821|2068/3633|690/1210|E/K|Gag/Aag|rs1057519794||1||SNV|HGNC|3236|YES|||||CCDS5514.1|ENSP00000275493|EGFR_HUMAN|Q75MF2_HUMAN&I3WA73_HUMAN&I3WA72_HUMAN&G9MC81_HUMAN&F1JTL6_HUMAN&E9PFD7_HUMAN&C9JYS6_HUMAN&A7VN06_HUMAN|UPI000003E750||1|deleterious(0)|probably_damaging(0.996)|PIRSF:PIRSF000619&Gene3D:1z9iA01&PANTHER:PTHR24416:SF91&PANTHER:PTHR24416&Low_complexity_(Seg):seg||||||||||||||||||||||likely_pathogenic||1|22885469||||| +7 55249005 . G T . . DISEASE=esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,esophageal_carcinoma,non-small_cell_lung_carcinoma;PMID=16707764,17653080,19147750,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968,16707764,17653080,19147750,22753918,23102728,25157968,25157968;CSQ=T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001726613|promoter_flanking_region||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|||||,T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00002050858|CTCF_binding_site||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|||||,T|TF_binding_site_variant|MODIFIER|||MotifFeature|ENSM00796118347|||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||1||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|ENSPFM0565|1|N|-0.020|TEAD4::MAX,T|TF_binding_site_variant|MODIFIER|||MotifFeature|ENSM00851445329|||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||-1||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|ENSPFM0239|22|N|-0.018|GCM1::MAX,T|TF_binding_site_variant|MODIFIER|||MotifFeature|ENSM00718913717|||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||1||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|ENSPFM0402|3|N|-0.002|MYBL1::MAX,T|TF_binding_site_variant|MODIFIER|||MotifFeature|ENSM00716522705|||||||||||rs121913465&COSV51768106&COSV51821681&COSV51833841||1||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|ENSPFM0401|3|N|-0.014|MYBL1::MAX,T|non_coding_transcript_exon_variant|MODIFIER|EGFR-AS1|ENSG00000224057|Transcript|ENST00000442411|antisense|2/2||||1244/2806|||||rs121913465&COSV51768106&COSV51821681&COSV51833841||-1||SNV|HGNC|40207|YES|||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735|||||,T|missense_variant|MODERATE|EGFR|ENSG00000146648|Transcript|ENST00000275493|protein_coding|20/28||||2480/9821|2303/3633|768/1210|S/I|aGc/aTc|rs121913465&COSV51768106&COSV51821681&COSV51833841||1||SNV|HGNC|3236|YES|||||CCDS5514.1|ENSP00000275493|EGFR_HUMAN|Q75MF2_HUMAN&I3WA73_HUMAN&I3WA72_HUMAN&G9MC81_HUMAN&F1JTL6_HUMAN&E9PFD7_HUMAN&C9JYS6_HUMAN&A7VN06_HUMAN|UPI000003E750||1|deleterious(0)|probably_damaging(0.999)|Superfamily:SSF56112&PIRSF:PIRSF000619&SMART:SM00219&Gene3D:3.30.200.20&Pfam:PF07714&PANTHER:PTHR24416:SF91&PANTHER:PTHR24416&PROSITE_profiles:PS50011|||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&22753918&23102728&19147750&15623594&16205628&16707764&16863509&17653080&19536777&20522446&29695735||||| +7 116423475 . G A . . DISEASE=cancer,cancer,cancer,cancer,cancer,cancer,cancer;PMID=17483355,17483355,17483355,17483355,17483355,17483355,17483355;CSQ=A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001408221|promoter_flanking_region||||||||||rs121913676&COSV59268145||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&1|1&1|17483355&9927037|||||,A|missense_variant|MODERATE|MET|ENSG00000105976|Transcript|ENST00000318493|protein_coding|19/21||||3991/4632|3804/4227|1268/1408|M/I|atG/atA|rs121913676&COSV59268145||1||SNV|HGNC|7029|YES|||||CCDS47689.1|ENSP00000317272|MET_HUMAN|Q9UEJ3_HUMAN&B4DPY6_HUMAN|UPI000014033E||1|deleterious(0)|probably_damaging(0.917)|PROSITE_profiles:PS50011&PANTHER:PTHR24416:SF261&PANTHER:PTHR24416&Gene3D:1.10.510.10&Pfam:PF07714&SMART:SM00219&PIRSF:PIRSF000617&Superfamily:SSF56112&Prints:PR00109|||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&1|1&1|17483355&9927037||||| +7 128849189 . G C . . DISEASE=basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,basal_cell_carcinoma,medulloblastoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,medulloblastoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,basal_cell_carcinoma,medulloblastoma,medulloblastoma,basal_cell_carcinoma;PMID=19726788,19726788,22679179,22679179,19726788,22679179,19726788,22679179,19726788,19726788,22679179,22679179,25759020,19726788,19726788,19726788,22679179,22679179,25759020,19726788,19726788,19726788,22679179,22679179,25759020,19726788,22679179,19726788,19726788,22679179,25759020,19726788,19726788,19726788,22679179,22679179,19726788,22550175,19726788,19726788,22679179,22679179,25759020,19726788,22550175,25759020;CSQ=C|missense_variant|MODERATE|SMO|ENSG00000128602|Transcript|ENST00000249373|protein_coding|8/12||||1697/3738|1417/2364|473/787|D/H|Gac/Cac|rs17710891&COSV50830986&COSV50837084&COSV99976277||1||SNV|HGNC|11119|YES|||||CCDS5811.1|ENSP00000249373|SMO_HUMAN|A4D1K5_HUMAN|UPI0000050447||1|deleterious(0)|probably_damaging(0.991)|PROSITE_profiles:PS50261&PANTHER:PTHR11309&PANTHER:PTHR11309:SF35&Pfam:PF01534|||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&1&1&1|1&1&1&1|24728327&19726788&22679179&25759020|||||,C|downstream_gene_variant|MODIFIER|RP11-286H14.8|ENSG00000243230|Transcript|ENST00000466717|antisense||||||||||rs17710891&COSV50830986&COSV50837084&COSV99976277|427|-1||SNV|Clone_based_vega_gene||YES|||||||||||||||||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&1&1&1|1&1&1&1|24728327&19726788&22679179&25759020||||| +7 140453149 . C T . . DISEASE=lung_adenocarcinoma,multiple_myeloma,glioblastoma_multiforme,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,multiple_myeloma,glioblastoma_multiforme,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,multiple_myeloma,glioblastoma_multiforme,urinary_bladder_urothelial_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=T|missense_variant|MODERATE|BRAF|ENSG00000157764|Transcript|ENST00000288602|protein_coding|15/18||||1847/2480|1786/2301|596/766|G/S|Ggt/Agt|rs121913361&COSV56066561&COSV56185580&COSV56316158&COSV99954791||-1||SNV|HGNC|1097|YES|||||CCDS5863.1|ENSP00000288602|BRAF_HUMAN|Q75MQ8_HUMAN&E5FF37_HUMAN&D7PBN4_HUMAN|UPI000013DF26||1|deleterious(0)|probably_damaging(1)|PROSITE_profiles:PS50011&PANTHER:PTHR23257:SF360&PANTHER:PTHR23257&Pfam:PF07714&Gene3D:1.10.510.10&SMART:SM00220&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:T:NA|likely_pathogenic|0&1&1&1&1|1&1&1&1&1|24033266&25157968&26619011&12068308&19537845&20350999&21129611&22180495&23093928&22310681&16439621&18413255||||| +7 140453154 . T C . . DISEASE=colorectal_cancer,melanoma,melanoma,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,colorectal_cancer,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,melanoma,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,colorectal_cancer,colorectal_cancer,melanoma,melanoma,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,colorectal_cancer,lung_adenocarcinoma,multiple_myeloma,skin_melanoma,brain_glioma,head_and_neck_squamous_cell_carcinoma,colorectal_cancer,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,adrenocortical_carcinoma,colorectal_cancer,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,melanoma,melanoma,colorectal_cancer,melanoma,melanoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,melanoma,colorectal_cancer,lung_adenocarcinoma,multiple_myeloma,skin_melanoma,brain_glioma,head_and_neck_squamous_cell_carcinoma,colorectal_cancer,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,adrenocortical_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,melanoma,melanoma,melanoma,melanoma,colorectal_cancer,multiple_myeloma,skin_melanoma,lung_adenocarcinoma,brain_glioma,head_and_neck_squamous_cell_carcinoma,colorectal_cancer,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,adrenocortical_carcinoma;PMID=12068308,12068308,14679157,18794803,19537845,20350999,20619739,21129611,22310681,12068308,19537845,20350999,20619739,21129611,22310681,25157968,25157968,12068308,14679157,18794803,12068308,14679157,18794803,19537845,20350999,20619739,21129611,22310681,12068308,25157968,25157968,22310681,12068308,12068308,14679157,18794803,19537845,20350999,20619739,21129611,25157968,25157968,12068308,12068308,14679157,18794803,19537845,20350999,20619739,21129611,22310681,25157968,25157968,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,12068308,12068308,19537845,20350999,20619739,21129611,22310681,14679157,18794803,25157968,25157968,14679157,18794803,19537845,20350999,20619739,21129611,22310681,12068308,12068308,25157968,25157968,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,12068308,19537845,20350999,20619739,21129611,22310681,12068308,14679157,18794803,25157968,25157968,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=C|missense_variant|MODERATE|BRAF|ENSG00000157764|Transcript|ENST00000288602|protein_coding|15/18||||1842/2480|1781/2301|594/766|D/G|gAt/gGt|rs121913338&COSV56065695&COSV56224199&COSV56283955||-1||SNV|HGNC|1097|YES|||||CCDS5863.1|ENSP00000288602|BRAF_HUMAN|Q75MQ8_HUMAN&E5FF37_HUMAN&D7PBN4_HUMAN|UPI000013DF26||1|deleterious(0)|probably_damaging(0.978)|PROSITE_profiles:PS50011&PANTHER:PTHR23257:SF360&PANTHER:PTHR23257&Pfam:PF07714&Gene3D:1.10.510.10&SMART:SM00220&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:C:NA|likely_pathogenic&uncertain_significance&pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&26619011&12068308&14679157&15035987&19537845&20350999&20619739&21129611&2872605&12438234&14612909&14678966&14688025&18794803&22310681||||| +7 148506467 . G C . . DISEASE=lymphoma,lymphoma,lymphoma,lymphoma,lymphoma,lymphoma,lymphoma;PMID=23023262,23023262,23023262,23023262,23023262,23023262,23023262;CSQ=C|missense_variant|MODERATE|EZH2|ENSG00000106462|Transcript|ENST00000320356|protein_coding|18/20||||2167/2639|2045/2256|682/751|A/G|gCa/gGa|rs1057519833&COSV57447878&COSV57452912||-1||SNV|HGNC|3527|YES|||||CCDS5891.1|ENSP00000320147|EZH2_HUMAN|Q75MQ0_HUMAN&Q6R125_HUMAN|UPI000006D77C||1|deleterious(0)|probably_damaging(0.996)|Superfamily:SSF82199&SMART:SM00317&Pfam:PF00856&Gene3D:2.170.270.10&PANTHER:PTHR22884&PANTHER:PTHR22884:SF287&PROSITE_profiles:PS50280|||||||||||||||||||||1KG_ALL:C:NA|likely_pathogenic|0&1&1|1&1&1|23023262||||| +7 148508728 . A T . . DISEASE=skin_melanoma,non-Hodgkin_lymphoma,skin_melanoma,non-Hodgkin_lymphoma,skin_melanoma,non-Hodgkin_lymphoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011;CSQ=T|missense_variant|MODERATE|EZH2|ENSG00000106462|Transcript|ENST00000320356|protein_coding|16/20||||2058/2639|1936/2256|646/751|Y/N|Tac/Aac|rs267601395&COSV57445793&COSV57445823&COSV57462051||-1||SNV|HGNC|3527|YES|||||CCDS5891.1|ENSP00000320147|EZH2_HUMAN|Q75MQ0_HUMAN&Q6R125_HUMAN|UPI000006D77C||1|deleterious(0.04)|possibly_damaging(0.75)|Superfamily:SSF82199&SMART:SM00317&Pfam:PF00856&Gene3D:2.170.270.10&PANTHER:PTHR22884&PANTHER:PTHR22884:SF287&PROSITE_profiles:PS50280|||||||||||||||||||||1KG_ALL:T:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011&23023262||||| +9 5078361 . G C . . DISEASE=acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia,acute_lymphocytic_leukemia;PMID=18805579,19470474,22368270,22955920,19470474,22368270,18805579,22955920,18805579,19470474,22368270,22955920,18805579,19470474,22368270,22955920,18805579,19470474,22368270,22955920,18805579,19470474,22368270,22955920,18805579,19470474,22368270,22955920,18805579,19470474,22368270,22955920;CSQ=C|missense_variant|MODERATE|JAK2|ENSG00000096968|Transcript|ENST00000381652|protein_coding|16/25||||2542/5285|2048/3399|683/1132|R/T|aGa/aCa|rs1057519722&COSV67581394&COSV67592547&COSV67612512||1||SNV|HGNC|6192|YES|||||CCDS6457.1|ENSP00000371067|JAK2_HUMAN|Q8IXP2_HUMAN&F5H5U8_HUMAN|UPI000012DA9E||1|deleterious(0)|probably_damaging(1)|Gene3D:1.10.510.10&Pfam:PF07714&PIRSF:PIRSF000636&PROSITE_profiles:PS50011&PANTHER:PTHR24418&PANTHER:PTHR24418:SF179&SMART:SM00219&Superfamily:SSF56112|||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&1&1&1|1&1&1&1|18805579&19470474&22368270&22955920|||||,C|intron_variant|MODIFIER|AL161450.1|ENSG00000268155|Transcript|ENST00000601793|protein_coding||1/2||||||||rs1057519722&COSV67581394&COSV67592547&COSV67612512||-1||SNV|Clone_based_ensembl_gene||YES||||||ENSP00000470600||M0QZJ9_HUMAN|UPI0001AE6DC8||||||||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&1&1&1|1&1&1&1|18805579&19470474&22368270&22955920||||| +9 133738364 . A T . . DISEASE=chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia,chronic_myeloid_leukemia;PMID=11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968,11569495,11853795,12399961,12623848,14745431,19164531,19264234,23676790,24236021,25157968;CSQ=T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000242257|promoter_flanking_region||||||||||rs121913449&COSV59325003||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1|1&1|25157968&11853795&11569495&14745431&24236021&12399961&12623848&23676790&19164531&19264234|||||,T|missense_variant|MODERATE|ABL1|ENSG00000097007|Transcript|ENST00000372348|protein_coding|4/11||||1195/3824|821/3450|274/1149|E/V|gAg/gTg|rs121913449&COSV59325003||1|cds_end_NF|SNV|HGNC|76|YES|||||CCDS35165.1|ENSP00000361423|ABL1_HUMAN|Q59FK4_HUMAN|UPI000013E4DE||1|deleterious(0.01)|probably_damaging(0.949)|Gene3D:3.30.200.20&Pfam:PF07714&PROSITE_patterns:PS00107&PROSITE_profiles:PS50011&PANTHER:PTHR24418&PANTHER:PTHR24418:SF87&SMART:SM00219&Superfamily:SSF55550&Superfamily:SSF56112&Low_complexity_(Seg):seg|||||||||||||||||||||1KG_ALL:T:NA|pathogenic&likely_pathogenic|0&1|1&1|25157968&11853795&11569495&14745431&24236021&12399961&12623848&23676790&19164531&19264234||||| +9 139391212 . G A . . DISEASE=non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma,non-small_cell_lung_carcinoma;PMID=20007775,20007775,20007775,20007775,20007775,20007775;CSQ=A|missense_variant|MODERATE|NOTCH1|ENSG00000148400|Transcript|ENST00000277541|protein_coding|34/34||||7055/9371|6979/7668|2327/2555|R/W|Cgg/Tgg|rs751007903&COSV53046255||-1||SNV|HGNC|7881|YES|||||CCDS43905.1|ENSP00000277541|NOTC1_HUMAN|H9CXX2_HUMAN|UPI0000210F68||1|tolerated(0.18)|possibly_damaging(0.794)|PIRSF:PIRSF002279&PANTHER:PTHR24033&PANTHER:PTHR24033:SF37||||||||||2.022e-05|0|2.899e-05|0|0|0|8.965e-06|0|9.806e-05|9.806e-05|gnomAD_SAS|1KG_ALL:A:NA|not_provided&uncertain_significance|0&1|1&1|20007775||||| +10 89717673 . G A . . DISEASE=uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma,uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma,uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,gastric_adenocarcinoma,colorectal_cancer,uterine_carcinosarcoma,prostate_adenocarcinoma,cervix_carcinoma,urinary_bladder_urothelial_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|PTEN|ENSG00000171862|Transcript|ENST00000371953|protein_coding|7/9||||2055/9027|698/1212|233/403|R/Q|cGa/cAa|rs770025422&CX1111739&COSV64293089||1||SNV|HGNC|9588|YES|||||CCDS31238.1|ENSP00000361021|PTEN_HUMAN|F6KD01_HUMAN&H6WA51_HUMAN&H6WA46_HUMAN|UPI00001328C5||1|tolerated(0.12)|benign(0.137)|Gene3D:1d5rA02&Pfam:PF10409&PIRSF:PIRSF038025&PROSITE_profiles:PS51182&PANTHER:PTHR12305&Superfamily:SSF49562||||||||||3.977e-06|0|0|0|0|0|8.792e-06|0|0|8.792e-06|gnomAD_NFE|1KG_ALL:A:NA|uncertain_significance&likely_pathogenic|0&0&1|1&1&1|26619011||||| +12 25378561 . G A . . DISEASE=colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,thyroid_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,thyroid_cancer,non-small_cell_lung_carcinoma;PMID=16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576,16361624,16618717,18316791,19114683,19255327,19679400,19773371,20921462,20921465,21228335,23406027,24836576;CSQ=A|upstream_gene_variant|MODIFIER|AC087239.1|ENSG00000268076|Transcript|ENST00000594112|protein_coding||||||||||rs1057519725&COSV55498939|2954|1||SNV|Clone_based_ensembl_gene||YES||||||ENSP00000472051||M0R1Q5_HUMAN|UPI0001AE6A40||||||||||||||||||||||||||1KG_ALL:A:NA|not_provided&pathogenic|0&1|1&1|16361624&16618717&18316791&19114683&19679400&20921462&20921465&21228335&19255327&19773371&23406027&24836576&26970110&30891959|||||,A|missense_variant|MODERATE|KRAS|ENSG00000133703|Transcript|ENST00000256078|protein_coding|4/6||||501/1119|437/570|146/189|A/V|gCa/gTa|rs1057519725&COSV55498939||-1||SNV|HGNC|6407|YES|||||CCDS8703.1|ENSP00000256078|RASK_HUMAN|Q9UM97_HUMAN&Q71SP6_HUMAN&P78460_HUMAN&L7RSL8_HUMAN&I1SRC5_HUMAN|UPI0000133132||1|deleterious(0.02)|possibly_damaging(0.884)|Gene3D:3.40.50.300&Pfam:PF00071&Prints:PR00449&PROSITE_profiles:PS51421&PANTHER:PTHR24070&PANTHER:PTHR24070:SF186&SMART:SM00173&SMART:SM00174&SMART:SM00175&SMART:SM00176&Superfamily:SSF52540&TIGRFAM:TIGR00231|||||||||||||||||||||1KG_ALL:A:NA|not_provided&pathogenic|0&1|1&1|16361624&16618717&18316791&19114683&19679400&20921462&20921465&21228335&19255327&19773371&23406027&24836576&26970110&30891959||||| +12 25378562 . C G . . DISEASE=colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer;PMID=16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335,20921462,20921465,21228335,16361624,16618717,18316791,19114683,19679400,16361624,16618717,18316791,19114683,19679400,20921462,20921465,21228335;CSQ=G|upstream_gene_variant|MODIFIER|AC087239.1|ENSG00000268076|Transcript|ENST00000594112|protein_coding||||||||||rs121913527&COSV55501778&COSV55541748&COSV55727828|2953|1||SNV|Clone_based_ensembl_gene||YES||||||ENSP00000472051||M0R1Q5_HUMAN|UPI0001AE6A40||||||||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic&pathogenic|0&1&1&1|1&1&1&1|25157968&16361624&16618717&18316791&19114683&19679400&20921462&20921465&21228335&21398618&29762787&25251940&26970110&30891959|||||,G|missense_variant|MODERATE|KRAS|ENSG00000133703|Transcript|ENST00000256078|protein_coding|4/6||||500/1119|436/570|146/189|A/P|Gca/Cca|rs121913527&COSV55501778&COSV55541748&COSV55727828||-1||SNV|HGNC|6407|YES|||||CCDS8703.1|ENSP00000256078|RASK_HUMAN|Q9UM97_HUMAN&Q71SP6_HUMAN&P78460_HUMAN&L7RSL8_HUMAN&I1SRC5_HUMAN|UPI0000133132||1|deleterious(0)|probably_damaging(1)|Gene3D:3.40.50.300&Pfam:PF00071&Prints:PR00449&PROSITE_profiles:PS51421&PANTHER:PTHR24070&PANTHER:PTHR24070:SF186&SMART:SM00173&SMART:SM00174&SMART:SM00175&SMART:SM00176&Superfamily:SSF52540&TIGRFAM:TIGR00231|||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic&pathogenic|0&1&1&1|1&1&1&1|25157968&16361624&16618717&18316791&19114683&19679400&20921462&20921465&21228335&21398618&29762787&25251940&26970110&30891959||||| +12 25398284 . C G . . DISEASE=gastrointestinal_stromal_tumor,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,gastrointestinal_stromal_tumor,colorectal_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma,gastrointestinal_stromal_tumor,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma,gastrointestinal_stromal_tumor,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma,gastrointestinal_stromal_tumor,colorectal_cancer,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma,gastrointestinal_stromal_tumor,non-small_cell_lung_carcinoma,colorectal_cancer,colorectal_cancer,colorectal_cancer,non-small_cell_lung_carcinoma,ovarian_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,colorectal_cancer,ovarian_cancer,colorectal_cancer,gastrointestinal_stromal_tumor,colorectal_cancer,lung_adenocarcinoma;PMID=22282465,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,22282465,25157968,25157968,22282465,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,25157968,25157968,19794967,22282465,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,25157968,25157968,19794967,22282465,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,22282465,23182985,25157968,25157968,19794967,22282465,23182985,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,25157968,25157968,19794967,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,25157968,25157968,19794967,22282465,12460918,16361624,16618717,18316791,18794081,19018267,19114683,19679400,20921462,20921465,21228335,21975775,23182985,25157968,25157968,19794967;CSQ=G|missense_variant|MODERATE|KRAS|ENSG00000133703|Transcript|ENST00000256078|protein_coding|2/6||||99/1119|35/570|12/189|G/A|gGt/gCt|rs121913529&COSV55497369&COSV55497419&COSV55497479||-1||SNV|HGNC|6407|YES|||||CCDS8703.1|ENSP00000256078|RASK_HUMAN|Q9UM97_HUMAN&Q71SP6_HUMAN&P78460_HUMAN&L7RSL8_HUMAN&I1SRC5_HUMAN|UPI0000133132||1|deleterious(0.02)|possibly_damaging(0.723)|Gene3D:3.40.50.300&Pfam:PF00071&Prints:PR00449&PROSITE_profiles:PS51421&PANTHER:PTHR24070&PANTHER:PTHR24070:SF186&SMART:SM00173&SMART:SM00174&SMART:SM00175&SMART:SM00176&Superfamily:SSF52540&TIGRFAM:TIGR00231&Low_complexity_(Seg):seg||||||||||0|0|0|0|0|0|0|0|0|0|gnomAD_AFR&gnomAD_AMR&gnomAD_ASJ&gnomAD_EAS&gnomAD_FIN&gnomAD_NFE&gnomAD_OTH&gnomAD_SAS|1KG_ALL:G:NA|likely_pathogenic¬_provided&pathogenic|0&1&1&1|1&1&1&1|25157968&12460918&19018267&21975775&17704260&2278970&3122217&15696205&16361624&16434492&16618717&18316791&18794081&19075190&19114683&19679400&20921462&20921465&21228335&22407852&17332249&17384584&19255327&19773371&19794967&21398618&23182985&23406027&19047918&7773929&8439212&15842656&17910045&19029981&19358724&19881948&20609353&20805368&20949522&21079152&21169357&22025163&22235099&22282465&22499344&22683711&22897852&23014527&25044103&26372703&27872090&29525983&31949278&29721857&30463544&29298116||||| +12 133253184 . G C . . DISEASE=uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=C|missense_variant|MODERATE|POLE|ENSG00000177084|Transcript|ENST00000320574|protein_coding|9/49||||901/7840|857/6861|286/2286|P/R|cCt/cGt|rs1057519943&COSV57673247&COSV57678855||-1||SNV|HGNC|9177|YES|||||CCDS9278.1|ENSP00000322570|DPOE1_HUMAN|Q9UNE8_HUMAN&Q96IE1_HUMAN&Q8WU23_HUMAN&F5H7H6_HUMAN&F5H5Q5_HUMAN&F5H3W5_HUMAN&F5H0H8_HUMAN&D3DXI9_HUMAN|UPI00001FBF97||1|deleterious(0)|probably_damaging(1)|PANTHER:PTHR10670&PANTHER:PTHR10670:SF0&Pfam:PF03104&Gene3D:3.30.420.10&SMART:SM00486&Superfamily:SSF53098|||||||||||||||||||||1KG_ALL:C:NA|drug_response&likely_pathogenic&uncertain_significance|0&1&1|1&1&1|26619011||||| +12 133253185 . G A . . DISEASE=uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer,uterine_corpus_endometrial_carcinoma,colorectal_cancer,uterine_carcinosarcoma,pancreas_adenocarcinoma,breast_cancer;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|POLE|ENSG00000177084|Transcript|ENST00000320574|protein_coding|9/49||||900/7840|856/6861|286/2286|P/S|Cct/Tct|rs1057519944&COSV57675830&COSV57680152||-1||SNV|HGNC|9177|YES|||||CCDS9278.1|ENSP00000322570|DPOE1_HUMAN|Q9UNE8_HUMAN&Q96IE1_HUMAN&Q8WU23_HUMAN&F5H7H6_HUMAN&F5H5Q5_HUMAN&F5H3W5_HUMAN&F5H0H8_HUMAN&D3DXI9_HUMAN|UPI00001FBF97||1|deleterious(0)|probably_damaging(0.998)|PANTHER:PTHR10670&PANTHER:PTHR10670:SF0&Pfam:PF03104&Gene3D:3.30.420.10&SMART:SM00486&Superfamily:SSF53098|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1|1&1&1|26619011||||| +13 28592640 . A C . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia;PMID=26619011,26619011,26619011;CSQ=C|missense_variant|MODERATE|FLT3|ENSG00000122025|Transcript|ENST00000241453|protein_coding|20/24||||2587/3842|2505/2982|835/993|D/E|gaT/gaG|rs121913487&COSV54044297&COSV54045054||-1||SNV|HGNC|3765|YES|||||CCDS31953.1|ENSP00000241453|FLT3_HUMAN||UPI00001FC90B||1|deleterious(0)|probably_damaging(0.969)|Superfamily:SSF56112&SMART:SM00219&PIRSF:PIRSF000615&Pfam:PF07714&Gene3D:1.10.510.10&PANTHER:PTHR24416:SF278&PANTHER:PTHR24416&PROSITE_profiles:PS50011|||||||||||||||||||||1KG_ALL:C:NA|pathogenic|0&1&1|1&1&1|25157968&22368270&19657110&11290608&15256420&16857985&20733134&22504183&22504184&23261068&23321257&23430109&23714533&23783394&15374878||||| +14 105243048 . G T . . DISEASE=melanoma,melanoma,melanoma,melanoma,melanoma,melanoma,melanoma;PMID=24265152,24265152,24265152,24265152,24265152,24265152,24265152;CSQ=T|missense_variant|MODERATE|AKT1|ENSG00000142208|Transcript|ENST00000554581|protein_coding|3/13||||1716/3916|235/1443|79/480|Q/K|Cag/Aag|rs1057519804&COSV62571663||-1||SNV|HGNC|391|YES|||||CCDS9994.1|ENSP00000451828|AKT1_HUMAN|B0LPE5_HUMAN&Q9BV07_HUMAN&G3V4I6_HUMAN&G3V3K5_HUMAN&B7Z5R1_HUMAN&B3KXD7_HUMAN|UPI0000070813||1|deleterious(0.02)|probably_damaging(0.967)|PROSITE_profiles:PS50003&PANTHER:PTHR24352&PANTHER:PTHR24352:SF30&Gene3D:2.30.29.30&Pfam:PF00169&SMART:SM00233&Superfamily:SSF50729|||||||||||||||||||||1KG_ALL:T:NA|likely_pathogenic|0&1|1&1|24265152||||| +14 105246551 . C T . . DISEASE=breast_cancer,breast_cancer,melanoma,breast_cancer,breast_cancer,melanoma,breast_cancer,breast_cancer,melanoma,breast_cancer,breast_cancer,melanoma,breast_cancer,breast_cancer,melanoma,breast_cancer,breast_cancer,melanoma;PMID=23888070,26351323,24735930,23888070,26351323,24735930,23888070,26351323,24735930,23888070,26351323,24735930,23888070,26351323,24735930,23888070,26351323,24735930;CSQ=T|missense_variant&splice_region_variant|MODERATE|AKT1|ENSG00000142208|Transcript|ENST00000554581|protein_coding|2/13||||1530/3916|49/1443|17/480|E/K|Gag/Aag|rs121434592&CM1611160&COSV62571334&COSV62576849||-1||SNV|HGNC|391|YES|||||CCDS9994.1|ENSP00000451828|AKT1_HUMAN|B0LPE5_HUMAN&Q9BV07_HUMAN&G3V4I6_HUMAN&G3V3K5_HUMAN&B7Z5R1_HUMAN&B3KXD7_HUMAN|UPI0000070813||1|deleterious(0)|probably_damaging(1)|PROSITE_profiles:PS50003&PANTHER:PTHR24352&PANTHER:PTHR24352:SF30&Gene3D:2.30.29.30&Pfam:PF00169&SMART:SM00233&Superfamily:SSF50729||||||||||4e-06|0|0|0|0|0|8.872e-06|0|0|8.872e-06|gnomAD_NFE|1KG_ALL:T:NA|uncertain_significance&pathogenic&likely_pathogenic|0&0&1&1|1&1&1&1|30781715&25157968&26619011&22538770&20233444&23700467&21512767&20453058&22980975&30065942&17611497&18504432&18611285&19418217&19487299&19853286&21464312&21793738&22610119&22722201&22722839&22876373&23237847&23348505&23728071&23934607&24190505&24657128&29114575&30883028&28569218&32014856||||| +15 90631839 . T A . . DISEASE=brain_glioma,acute_myeloid_leukemia,hepatocellular_carcinoma,brain_stem_glioma,colorectal_cancer,brain_glioma,acute_myeloid_leukemia,hepatocellular_carcinoma,brain_stem_glioma,colorectal_cancer,brain_glioma,acute_myeloid_leukemia,hepatocellular_carcinoma,brain_stem_glioma,colorectal_cancer;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|IDH2|ENSG00000182054|Transcript|ENST00000330062|protein_coding|4/11||||628/2694|514/1359|172/452|R/W|Agg/Tgg|rs1057519906&COSV57468942&COSV57468989&COSV57477941||-1||SNV|HGNC|5383|YES|||||CCDS10359.1|ENSP00000331897|IDHP_HUMAN|H0YLL5_HUMAN&B4DSZ6_HUMAN&B4DFL2_HUMAN|UPI000012D1C3||1|deleterious_low_confidence(0)|probably_damaging(1)|PANTHER:PTHR11822&Pfam:PF00180&TIGRFAM:TIGR00127&Gene3D:3.40.718.10&PIRSF:PIRSF000108&Superfamily:SSF53659|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011||||| +16 3788617 . C A . . DISEASE=head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma,head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma,head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|CREBBP|ENSG00000005339|Transcript|ENST00000262367|protein_coding|26/31||||5147/10803|4337/7329|1446/2442|R/L|cGc/cTc|rs1057519884&COSV52112787&COSV52125401&COSV52132241||-1||SNV|HGNC|2348|YES|||||CCDS10509.1|ENSP00000262367|CBP_HUMAN|Q75MY6_HUMAN&I3L3I5_HUMAN&B5A253_HUMAN&B5A252_HUMAN&B5A250_HUMAN&B5A246_HUMAN&B5A244_HUMAN&B5A243_HUMAN&B5A242_HUMAN&B5A240_HUMAN&B5A239_HUMAN&B5A235_HUMAN&B5A231_HUMAN&B5A227_HUMAN&B5A226_HUMAN&B5A222_HUMAN&B5A221_HUMAN&B5A219_HUMAN&B5A218_HUMAN&B5A216_HUMAN&B5A215_HUMAN&B5A214_HUMAN&B5A212_HUMAN|UPI0000000620||1|deleterious(0)|probably_damaging(0.995)|PANTHER:PTHR13808&PANTHER:PTHR13808:SF5&Pfam:PF08214|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011||||| +16 3788617 . C T . . DISEASE=head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma,head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma,head_and_neck_squamous_cell_carcinoma,medulloblastoma,hepatocellular_carcinoma,urinary_bladder_urothelial_carcinoma,gastric_adenocarcinoma,skin_melanoma,lung_squamous_cell_carcinoma,glioblastoma_multiforme,colorectal_cancer,cervix_carcinoma,salivary_gland_adenoid_cystic_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=T|missense_variant|MODERATE|CREBBP|ENSG00000005339|Transcript|ENST00000262367|protein_coding|26/31||||5147/10803|4337/7329|1446/2442|R/H|cGc/cAc|rs1057519884&COSV52112787&COSV52125401&COSV52132241||-1||SNV|HGNC|2348|YES|||||CCDS10509.1|ENSP00000262367|CBP_HUMAN|Q75MY6_HUMAN&I3L3I5_HUMAN&B5A253_HUMAN&B5A252_HUMAN&B5A250_HUMAN&B5A246_HUMAN&B5A244_HUMAN&B5A243_HUMAN&B5A242_HUMAN&B5A240_HUMAN&B5A239_HUMAN&B5A235_HUMAN&B5A231_HUMAN&B5A227_HUMAN&B5A226_HUMAN&B5A222_HUMAN&B5A221_HUMAN&B5A219_HUMAN&B5A218_HUMAN&B5A216_HUMAN&B5A215_HUMAN&B5A214_HUMAN&B5A212_HUMAN|UPI0000000620||1|deleterious(0)|probably_damaging(0.996)|PANTHER:PTHR13808&PANTHER:PTHR13808:SF5&Pfam:PF08214|||||||||||||||||||||1KG_ALL:T:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011||||| +17 7577081 . T G . . DISEASE=lung_adenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,skin_melanoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,skin_squamous_cell_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,brain_glioma,acute_myeloid_leukemia,esophageal_carcinoma,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,skin_melanoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,skin_squamous_cell_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,brain_glioma,acute_myeloid_leukemia,esophageal_carcinoma,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,skin_melanoma,lung_small_cell_carcinoma,hepatocellular_carcinoma,skin_squamous_cell_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,brain_glioma,acute_myeloid_leukemia,esophageal_carcinoma,urinary_bladder_urothelial_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=G|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001591779|promoter_flanking_region||||||||||rs1057519985&CM920679&COSV52661405&COSV52748431&COSV53044704||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic|0&0&1&1&1|1&1&1&1&1|26619011|||||,G|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001888292|CTCF_binding_site||||||||||rs1057519985&CM920679&COSV52661405&COSV52748431&COSV53044704||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic|0&0&1&1&1|1&1&1&1&1|26619011|||||,G|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|8/11||||1047/2579|857/1182|286/393|E/A|gAa/gCa|rs1057519985&CM920679&COSV52661405&COSV52748431&COSV53044704||-1||SNV|HGNC|11998|YES|||||CCDS11118.1|ENSP00000269305|P53_HUMAN|S5LQU8_HUMAN&Q761V2_HUMAN&Q6IT77_HUMAN&Q1HGV1_HUMAN&Q0PKT5_HUMAN&L0ES54_HUMAN&L0EQ05_HUMAN&K7PPA8_HUMAN&H2EHT1_HUMAN&G4Y083_HUMAN&E9PCY9_HUMAN&E7ESS1_HUMAN&E7EMR6_HUMAN&B5AKF6_HUMAN&B4DNI2_HUMAN&A4GWD0_HUMAN&A4GWB8_HUMAN&A4GWB5_HUMAN&A4GW97_HUMAN&A4GW76_HUMAN&A4GW75_HUMAN&A4GW74_HUMAN&A4GW67_HUMAN&A2I9Z1_HUMAN&A2I9Z0_HUMAN|UPI000002ED67||1|deleterious(0)|probably_damaging(0.989)|Gene3D:2.60.40.720&Pfam:PF00870&Prints:PR00386&PANTHER:PTHR11447&PANTHER:PTHR11447:SF6&Superfamily:SSF49417|||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic|0&0&1&1&1|1&1&1&1&1|26619011||||| +17 7577096 . T A . . DISEASE=lung_adenocarcinoma,ovarian_serous_cystadenocarcinoma,glioblastoma_multiforme,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,skin_melanoma,pancreas_adenocarcinoma,neuroblastoma,multiple_myeloma,lung_squamous_cell_carcinoma,hepatocellular_carcinoma,renal_clear_cell_carcinoma,head_and_neck_squamous_cell_carcinoma,skin_squamous_cell_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,ovarian_serous_cystadenocarcinoma,glioblastoma_multiforme,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,skin_melanoma,pancreas_adenocarcinoma,neuroblastoma,multiple_myeloma,lung_squamous_cell_carcinoma,hepatocellular_carcinoma,renal_clear_cell_carcinoma,head_and_neck_squamous_cell_carcinoma,skin_squamous_cell_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,lung_adenocarcinoma,ovarian_serous_cystadenocarcinoma,glioblastoma_multiforme,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,gastric_adenocarcinoma,skin_melanoma,pancreas_adenocarcinoma,neuroblastoma,multiple_myeloma,lung_squamous_cell_carcinoma,hepatocellular_carcinoma,renal_clear_cell_carcinoma,head_and_neck_squamous_cell_carcinoma,skin_squamous_cell_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001591779|promoter_flanking_region||||||||||rs587781525&CM004343&CM056068&CM114008&COSV52694391&COSV52729448&COSV52815868||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&0&0&0&1&1&1|1&1&1&1&1&1&1|26619011&30871634|||||,A|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001888292|CTCF_binding_site||||||||||rs587781525&CM004343&CM056068&CM114008&COSV52694391&COSV52729448&COSV52815868||||SNV||||||||||||||||||||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&0&0&0&1&1&1|1&1&1&1&1&1&1|26619011&30871634|||||,A|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|8/11||||1032/2579|842/1182|281/393|D/V|gAc/gTc|rs587781525&CM004343&CM056068&CM114008&COSV52694391&COSV52729448&COSV52815868||-1||SNV|HGNC|11998|YES|||||CCDS11118.1|ENSP00000269305|P53_HUMAN|S5LQU8_HUMAN&Q761V2_HUMAN&Q6IT77_HUMAN&Q1HGV1_HUMAN&Q0PKT5_HUMAN&L0ES54_HUMAN&L0EQ05_HUMAN&K7PPA8_HUMAN&H2EHT1_HUMAN&G4Y083_HUMAN&E9PCY9_HUMAN&E7ESS1_HUMAN&E7EMR6_HUMAN&B5AKF6_HUMAN&B4DNI2_HUMAN&A4GWD0_HUMAN&A4GWB8_HUMAN&A4GWB5_HUMAN&A4GW97_HUMAN&A4GW76_HUMAN&A4GW75_HUMAN&A4GW74_HUMAN&A4GW67_HUMAN&A2I9Z1_HUMAN&A2I9Z0_HUMAN|UPI000002ED67||1|deleterious(0)|probably_damaging(0.987)|Gene3D:2.60.40.720&Pfam:PF00870&Prints:PR00386&PANTHER:PTHR11447&PANTHER:PTHR11447:SF6&Superfamily:SSF49417|||||||||||||||||||||1KG_ALL:A:NA|pathogenic&likely_pathogenic|0&0&0&0&1&1&1|1&1&1&1&1&1&1|26619011&30871634||||| +17 7577120 . C T . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,cancer,breast_cancer,breast_cancer,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,breast_cancer,breast_cancer,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,breast_cancer,breast_cancer,brain_glioma,colorectal_cancer,ovarian_serous_cystadenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,lung_adenocarcinoma,esophageal_carcinoma,pancreas_adenocarcinoma,uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,lung_squamous_cell_carcinoma,uterine_carcinosarcoma,prostate_adenocarcinoma,brain_stem_glioma,lung_small_cell_carcinoma,hepatocellular_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,skin_melanoma,multiple_myeloma,medulloblastoma,acute_myeloid_leukemia,adrenocortical_carcinoma,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,breast_cancer,breast_cancer,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,breast_cancer,breast_cancer,brain_glioma,colorectal_cancer,ovarian_serous_cystadenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,lung_adenocarcinoma,esophageal_carcinoma,pancreas_adenocarcinoma,uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,lung_squamous_cell_carcinoma,uterine_carcinosarcoma,prostate_adenocarcinoma,brain_stem_glioma,lung_small_cell_carcinoma,hepatocellular_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,skin_melanoma,multiple_myeloma,medulloblastoma,acute_myeloid_leukemia,adrenocortical_carcinoma,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,cancer,breast_cancer,brain_glioma,colorectal_cancer,ovarian_serous_cystadenocarcinoma,breast_cancer,gastric_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,lung_adenocarcinoma,esophageal_carcinoma,pancreas_adenocarcinoma,uterine_corpus_endometrial_carcinoma,glioblastoma_multiforme,lung_squamous_cell_carcinoma,uterine_carcinosarcoma,prostate_adenocarcinoma,brain_stem_glioma,lung_small_cell_carcinoma,hepatocellular_carcinoma,chronic_lymphocytic_leukemia,urinary_bladder_urothelial_carcinoma,skin_melanoma,multiple_myeloma,medulloblastoma,acute_myeloid_leukemia,adrenocortical_carcinoma,breast_cancer;PMID=24487413,24641375,24381225,24487413,24641375,24381225,24487413,24381225,24641375,25157968,25157968,16489069,9569050,24381225,24487413,24641375,25157968,16489069,9569050,24381225,24487413,24641375,25157968,16489069,9569050,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,24381225,24487413,24641375,25157968,16489069,9569050,24381225,24487413,24641375,25157968,16489069,9569050,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,24381225,24487413,24641375,25157968,16489069,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,9569050;CSQ=T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001591779|promoter_flanking_region||||||||||rs28934576&CM004342&CM010472&CM920677&COSV52660980&COSV52664805&COSV52676050&COSV52728930||||SNV|||||||||||||||||||0.0002|0|0|0|0.001|0|||1.593e-05|0|0|9.929e-05|0|0|2.643e-05|0|0|0.001|EUR|1KG_ALL:T:0.0002|pathogenic/likely_pathogenic&pathogenic&likely_pathogenic|0&0&0&0&1&1&1&1|1&1&1&1&1&1&1&1|25157968&26619011&26900293&32039725&25105660&9569050&16489069&17606709&18511570&23161690&24381225&24487413&24641375&26716509&1565144&15951970&8423216&15390294&28854261&31882575&26884312|||||,T|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001888292|CTCF_binding_site||||||||||rs28934576&CM004342&CM010472&CM920677&COSV52660980&COSV52664805&COSV52676050&COSV52728930||||SNV|||||||||||||||||||0.0002|0|0|0|0.001|0|||1.593e-05|0|0|9.929e-05|0|0|2.643e-05|0|0|0.001|EUR|1KG_ALL:T:0.0002|pathogenic/likely_pathogenic&pathogenic&likely_pathogenic|0&0&0&0&1&1&1&1|1&1&1&1&1&1&1&1|25157968&26619011&26900293&32039725&25105660&9569050&16489069&17606709&18511570&23161690&24381225&24487413&24641375&26716509&1565144&15951970&8423216&15390294&28854261&31882575&26884312|||||,T|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|8/11||||1008/2579|818/1182|273/393|R/H|cGt/cAt|rs28934576&CM004342&CM010472&CM920677&COSV52660980&COSV52664805&COSV52676050&COSV52728930||-1||SNV|HGNC|11998|YES|||||CCDS11118.1|ENSP00000269305|P53_HUMAN|S5LQU8_HUMAN&Q761V2_HUMAN&Q6IT77_HUMAN&Q1HGV1_HUMAN&Q0PKT5_HUMAN&L0ES54_HUMAN&L0EQ05_HUMAN&K7PPA8_HUMAN&H2EHT1_HUMAN&G4Y083_HUMAN&E9PCY9_HUMAN&E7ESS1_HUMAN&E7EMR6_HUMAN&B5AKF6_HUMAN&B4DNI2_HUMAN&A4GWD0_HUMAN&A4GWB8_HUMAN&A4GWB5_HUMAN&A4GW97_HUMAN&A4GW76_HUMAN&A4GW75_HUMAN&A4GW74_HUMAN&A4GW67_HUMAN&A2I9Z1_HUMAN&A2I9Z0_HUMAN|UPI000002ED67||1|tolerated(0.13)|possibly_damaging(0.643)|Gene3D:2.60.40.720&Pfam:PF00870&Prints:PR00386&PANTHER:PTHR11447&PANTHER:PTHR11447:SF6&Superfamily:SSF49417||0.0002|0|0|0|0.001|0|||1.593e-05|0|0|9.929e-05|0|0|2.643e-05|0|0|0.001|EUR|1KG_ALL:T:0.0002|pathogenic/likely_pathogenic&pathogenic&likely_pathogenic|0&0&0&0&1&1&1&1|1&1&1&1&1&1&1&1|25157968&26619011&26900293&32039725&25105660&9569050&16489069&17606709&18511570&23161690&24381225&24487413&24641375&26716509&1565144&15951970&8423216&15390294&28854261&31882575&26884312||||| +17 7577518 . T A . . DISEASE=esophageal_carcinoma,breast_cancer,pancreas_adenocarcinoma,lung_adenocarcinoma,brain_glioma,glioblastoma_multiforme,chronic_lymphocytic_leukemia,esophageal_carcinoma,breast_cancer,pancreas_adenocarcinoma,lung_adenocarcinoma,brain_glioma,glioblastoma_multiforme,chronic_lymphocytic_leukemia,esophageal_carcinoma,breast_cancer,pancreas_adenocarcinoma,lung_adenocarcinoma,brain_glioma,glioblastoma_multiforme,chronic_lymphocytic_leukemia;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|7/11||||953/2579|763/1182|255/393|I/F|Atc/Ttc|rs1057519995&COSV52663197&COSV52713501&COSV52816344||-1||SNV|HGNC|11998|YES|||||CCDS11118.1|ENSP00000269305|P53_HUMAN|S5LQU8_HUMAN&Q761V2_HUMAN&Q6IT77_HUMAN&Q1HGV1_HUMAN&Q0PKT5_HUMAN&L0ES54_HUMAN&L0EQ05_HUMAN&K7PPA8_HUMAN&H2EHT1_HUMAN&G4Y083_HUMAN&E9PCY9_HUMAN&E7ESS1_HUMAN&E7EMR6_HUMAN&B5AKF6_HUMAN&B4DNI2_HUMAN&A4GWD0_HUMAN&A4GWB8_HUMAN&A4GWB5_HUMAN&A4GW97_HUMAN&A4GW76_HUMAN&A4GW75_HUMAN&A4GW74_HUMAN&A4GW67_HUMAN&A2I9Z1_HUMAN&A2I9Z0_HUMAN|UPI000002ED67||1|deleterious(0)|probably_damaging(0.984)|Gene3D:2.60.40.720&Pfam:PF00870&Prints:PR00386&PANTHER:PTHR11447&PANTHER:PTHR11447:SF6&Superfamily:SSF49417|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011||||| +17 7578235 . T A . . DISEASE=lung_adenocarcinoma,hepatocellular_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,head_and_neck_squamous_cell_carcinoma,esophageal_carcinoma,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,multiple_myeloma,non-Hodgkin_lymphoma,lung_squamous_cell_carcinoma,brain_glioma,chromophobe_adenocarcinoma,glioblastoma_multiforme,lung_adenocarcinoma,hepatocellular_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,head_and_neck_squamous_cell_carcinoma,esophageal_carcinoma,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,multiple_myeloma,non-Hodgkin_lymphoma,lung_squamous_cell_carcinoma,brain_glioma,chromophobe_adenocarcinoma,glioblastoma_multiforme,lung_adenocarcinoma,hepatocellular_carcinoma,colorectal_cancer,pancreas_adenocarcinoma,ovarian_serous_cystadenocarcinoma,head_and_neck_squamous_cell_carcinoma,esophageal_carcinoma,breast_cancer,uterine_carcinosarcoma,uterine_corpus_endometrial_carcinoma,multiple_myeloma,non-Hodgkin_lymphoma,lung_squamous_cell_carcinoma,brain_glioma,chromophobe_adenocarcinoma,glioblastoma_multiforme;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=A|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|6/11||||804/2579|614/1182|205/393|Y/F|tAt/tTt|rs1057520007&COSV52665440&COSV52677268&COSV52688506||-1||SNV|HGNC|11998|YES|||||CCDS11118.1|ENSP00000269305|P53_HUMAN|S5LQU8_HUMAN&Q761V2_HUMAN&Q6IT77_HUMAN&Q1HGV1_HUMAN&Q0PKT5_HUMAN&L0ES54_HUMAN&L0EQ05_HUMAN&K7PPA8_HUMAN&H2EHT1_HUMAN&G4Y083_HUMAN&E9PCY9_HUMAN&E7ESS1_HUMAN&E7EMR6_HUMAN&B5AKF6_HUMAN&B4DNI2_HUMAN&A4GWD0_HUMAN&A4GWB8_HUMAN&A4GWB5_HUMAN&A4GW97_HUMAN&A4GW76_HUMAN&A4GW75_HUMAN&A4GW74_HUMAN&A4GW67_HUMAN&A2I9Z1_HUMAN&A2I9Z0_HUMAN|UPI000002ED67||1|deleterious(0)|probably_damaging(0.963)|Gene3D:2.60.40.720&Pfam:PF00870&PANTHER:PTHR11447&PANTHER:PTHR11447:SF6&Superfamily:SSF49417|||||||||||||||||||||1KG_ALL:A:NA|likely_pathogenic|0&1&1&1|1&1&1&1|26619011||||| +17 37881000 . G T . . DISEASE=breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,gastric_adenocarcinoma,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,gastric_adenocarcinoma,colon_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,breast_cancer,colorectal_cancer,breast_cancer,gastric_adenocarcinoma,colon_cancer,breast_cancer;PMID=16397024,23220880,22046346,24516025,22046346,24516025,23220880,16397024,25157968,16397024,22046346,24516025,25157968,23220880,22046346,24516025,16397024,23220880,25157968,22046346,24516025,16397024,23220880,22046346,24516025,25157968,26619011,26619011,26619011,22046346,24516025,16397024,25157968,16397024,23220880,25157968,26619011,26619011,26619011,26243863,22046346,24516025,16397024,23220880,25157968,26619011,26619011,26619011,26243863,23220880;CSQ=T|downstream_gene_variant|MODIFIER|MIEN1|ENSG00000141741|Transcript|ENST00000394231|protein_coding||||||||||rs121913471&COSV54062385&COSV54062767&COSV54069186|4409|-1||SNV|HGNC|28230|YES|||||CCDS11344.1|ENSP00000377778|MIEN1_HUMAN||UPI0000043728||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&uncertain_significance&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&26619011&16397024&23220880&22046346&16988931&24516025|||||,T|missense_variant|MODERATE|ERBB2|ENSG00000141736|Transcript|ENST00000269571|protein_coding|20/27||||2488/4545|2329/3768|777/1255|V/L|Gtg/Ttg|rs121913471&COSV54062385&COSV54062767&COSV54069186||1||SNV|HGNC|3430|YES|||||CCDS32642.1|ENSP00000269571|ERBB2_HUMAN|Q9NP09_HUMAN&J3QLV2_HUMAN&J3KS21_HUMAN&F5H1T4_HUMAN|UPI000003F55F||1|deleterious(0.05)|benign(0.036)|Gene3D:3.30.200.20&Pfam:PF07714&PIRSF:PIRSF000619&PROSITE_profiles:PS50011&PANTHER:PTHR24416&PANTHER:PTHR24416:SF137&SMART:SM00219&Superfamily:SSF56112&Transmembrane_helices:TMhelix|||||||||||||||||||||1KG_ALL:T:NA|pathogenic&uncertain_significance&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&26619011&16397024&23220880&22046346&16988931&24516025|||||,T|upstream_gene_variant|MODIFIER|MIR4728|ENSG00000265178|Transcript|ENST00000580969|miRNA||||||||||rs121913471&COSV54062385&COSV54062767&COSV54069186|1748|1||SNV|HGNC|41632|YES|||||||||||||||||||||||||||||||||||1KG_ALL:T:NA|pathogenic&uncertain_significance&likely_pathogenic|0&1&1&1|1&1&1&1|24033266&25157968&26619011&16397024&23220880&22046346&16988931&24516025||||| +19 1218493 . A G . . DISEASE=lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma,lung_squamous_cell_carcinoma;PMID=17676035,17676035,17676035;CSQ=G|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001153274|CTCF_binding_site||||||||||rs764449808&COSV58821024||||SNV|||||||||||||||||||||||||||1.205e-05|0|0|0|0|0.0001394|0|0|0|0.0001394|gnomAD_FIN|1KG_ALL:G:NA|likely_pathogenic|0&1|1&1|17676035|||||,G|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001906985|promoter||||||||||rs764449808&COSV58821024||||SNV|||||||||||||||||||||||||||1.205e-05|0|0|0|0|0.0001394|0|0|0|0.0001394|gnomAD_FIN|1KG_ALL:G:NA|likely_pathogenic|0&1|1&1|17676035|||||,G|missense_variant|MODERATE|STK11|ENSG00000118046|Transcript|ENST00000326873|protein_coding|2/10||||1541/3328|368/1302|123/433|Q/R|cAg/cGg|rs764449808&COSV58821024||1||SNV|HGNC|11389|YES|||||CCDS45896.1|ENSP00000324856|STK11_HUMAN|Q9NS52_HUMAN|UPI0000136105||1|deleterious(0)|possibly_damaging(0.776)|PROSITE_profiles:PS50011&PANTHER:PTHR24347:SF1&PANTHER:PTHR24347&Pfam:PF00069&Gene3D:3.30.200.20&SMART:SM00220&Superfamily:SSF56112||||||||||1.205e-05|0|0|0|0|0.0001394|0|0|0|0.0001394|gnomAD_FIN|1KG_ALL:G:NA|likely_pathogenic|0&1|1&1|17676035||||| +19 4117549 . A C . . DISEASE=skin_melanoma,gastric_adenocarcinoma,pancreas_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,skin_melanoma,gastric_adenocarcinoma,pancreas_adenocarcinoma,head_and_neck_squamous_cell_carcinoma,skin_melanoma,gastric_adenocarcinoma,pancreas_adenocarcinoma,head_and_neck_squamous_cell_carcinoma;PMID=26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011,26619011;CSQ=C|missense_variant|MODERATE|MAP2K2|ENSG00000126934|Transcript|ENST00000262948|protein_coding|2/11||||425/1734|171/1203|57/400|F/L|ttT/ttG|rs1057519910&CM083721&COSV104543641&COSV53569171&COSV99502205||-1||SNV|HGNC|6842|YES|||||CCDS12120.1|ENSP00000262948|MP2K2_HUMAN|G5E9C7_HUMAN&B3KS97_HUMAN|UPI000012F489||1|tolerated(0.06)|probably_damaging(0.993)|Coiled-coils_(Ncoils):Coil&PANTHER:PTHR24360&PANTHER:PTHR24360:SF15|||||||||||||||||||||1KG_ALL:C:NA|likely_pathogenic|0&0&1&1&1|1&1&1&1&1|26619011||||| +21 36252876 . C G . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia;PMID=24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740,24374719,24616160,24659740;CSQ=G|missense_variant|MODERATE|RUNX1|ENSG00000159216|Transcript|ENST00000300305|protein_coding|4/8||||931/6222|486/1443|162/480|R/S|agG/agC|rs1057519749&COSV100277991&COSV55879245||-1||SNV|HGNC|10471|YES|||||CCDS13639.1|ENSP00000300305|RUNX1_HUMAN||UPI000015FE6A||1|deleterious(0)|probably_damaging(0.997)|Gene3D:2.60.40.720&Pfam:PF00853&PIRSF:PIRSF009374&Prints:PR00967&PROSITE_profiles:PS51062&PANTHER:PTHR11950&Superfamily:SSF49417|||||||||||||||||||||1KG_ALL:G:NA|likely_pathogenic|0&1&1|1&1&1|24374719&24616160&24659740||||| +21 44514777 . T G . . DISEASE=acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,myelodysplastic_syndrome,myelodysplastic_syndrome,myelodysplastic_syndrome,myelodysplastic_syndrome,acute_myeloid_leukemia,acute_myeloid_leukemia,acute_myeloid_leukemia,myelodysplastic_syndrome,myelodysplastic_syndrome,acute_myeloid_leukemia,acute_myeloid_leukemia,myelodysplastic_syndrome,myelodysplastic_syndrome,acute_myeloid_leukemia,myelodysplastic_syndrome,myelodysplastic_syndrome,acute_myeloid_leukemia,acute_myeloid_leukemia,myelodysplastic_syndrome,myelodysplastic_syndrome,acute_myeloid_leukemia;PMID=22158538,22158538,22158538,22158538,23029227,23861105,23029227,23861105,23029227,23029227,22158538,23029227,23861105,23029227,22158538,23029227,23861105,23029227,22158538,23861105,23029227,23029227,22158538,23861105,23029227,23029227;CSQ=G|missense_variant|MODERATE|U2AF1|ENSG00000160201|Transcript|ENST00000291552|protein_coding|6/8||||563/962|470/723|157/240|Q/P|cAg/cCg|rs371246226&COSV52341120&COSV52341147||-1||SNV|HGNC|12453|YES|||||CCDS13694.1|ENSP00000291552|U2AF1_HUMAN||UPI0000000C26||1|deleterious(0)|possibly_damaging(0.531)|Pfam:PF00642&PROSITE_profiles:PS50103&PANTHER:PTHR12620&PANTHER:PTHR12620:SF5&SMART:SM00356||||||||||2.387e-05|0|0|0|0|0|5.279e-05|0|0|5.279e-05|gnomAD_NFE|1KG_ALL:G:NA|not_provided&likely_pathogenic|0&1&1|1&1&1|22158538&23029227&23861105||||| diff --git a/tests/test_files/test.vcf.gz b/tests/test_files/test.vcf.gz new file mode 100644 index 0000000..dd91345 Binary files /dev/null and b/tests/test_files/test.vcf.gz differ diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..787aedf --- /dev/null +++ b/tower.yml @@ -0,0 +1,5 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/variantmtb.nf b/workflows/variantmtb.nf index 9615bbf..4895b40 100644 --- a/workflows/variantmtb.nf +++ b/workflows/variantmtb.nf @@ -1,121 +1,222 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { QUERYNATOR_CGIAPI } from '../modules/local/querynator/cgiapi' +include { QUERYNATOR_CIVICAPI } from '../modules/local/querynator/civicapi' +include { QUERYNATOR_CREATEREPORT } from '../modules/local/querynator/createreport' -// Validate input parameters -WorkflowVariantmtb.initialise(params, log) +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main.nf' +include { GUNZIP } from '../modules/nf-core/gunzip/main' +include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' +include { TABIX_BGZIPTABIX } from '../modules/nf-core/tabix/bgziptabix/main' +include { BCFTOOLS_NORM } from '../modules/nf-core/bcftools/norm/main' -// TODO nf-core: Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { getGenomeAttribute } from '../subworkflows/local/utils_nfcore_variantmtb_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +workflow VARIANTMTB { -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + take: + ch_samplesheet // channel: samplesheet read in from --input + ch_versions // channel: versions of the software used in the pipeline, emitted by initialization subworkflow -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + main: -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' + /* + ======================================================================================== + PREPARE INPUT FOR THE DIFFERENT QUERYNATOR QUERIES + ======================================================================================== + */ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + // CHECK PARAMETERS + if ( params.databases.contains("civic" ) & !params.fasta & !params.genome ) { error("No reference provided! use --genome or --fasta" )} -// Info required for completion email and summary -def multiqc_report = [] + // CHECK SECRETS + if ( params.databases.contains("cgi" ) & System.getenv("NXF_ENABLE_SECRETS") != 'true') { error("Please enable secrets: export NXF_ENABLE_SECRETS='true'")} -workflow VARIANTMTB { + ch_samplesheet + .map { meta, input_file -> + meta["compressed"] = input_file.extension == "gz" ? "compressed" : "uncompressed" + return [ meta, input_file ] } + .set { ch_input } + + // if specified, fetch fasta file from --genome parameter, --fasta has priority + fasta = params.fasta ? Channel.fromPath(params.fasta).collect() : Channel.fromPath(getGenomeAttribute('fasta')).collect() - ch_versions = Channel.empty() + /* + ------------------------ + CGI + ------------------------ + */ - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( + if (params.databases.contains("cgi")) { + + // Separate different filetypes for cgi input (mutations, translocations, cnas) ch_input - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowVariantmtb.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect() - ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) -} + .branch { + meta, input_file -> + mutations : meta["filetype"] == 'mutations' + return [ meta, + input_file, + [], + [], + meta["cgi_cancer"], + meta["ref"] + ] + translocations : meta["filetype"] == 'translocations' + return [ meta, + [], + input_file, + [], + meta["cgi_cancer"], + meta["ref"] + ] + cnas : meta["filetype"] == 'cnas' + return [ meta, + [], + [], + input_file, + meta["cgi_cancer"], + meta["ref"] + ] + } + .set { ch_input_filetype_split } + + // Recombine the channels & Create querynator CGI input + ch_input_filetype_split.mutations + .mix (ch_input_filetype_split.translocations, + ch_input_filetype_split.cnas ) + .set { ch_cgi_input } + } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + /* + ------------------------ + CIViC + ------------------------ + */ + + if (params.databases.contains("civic")) { + + ch_input + .branch { meta, input_file -> + compressed_mutations : meta["compressed"] == 'compressed' & meta["filetype"] == 'mutations' + return [ meta, input_file ] + uncompressed_mutations : meta["compressed"] == 'uncompressed' & meta["filetype"] == 'mutations' + return [ meta, input_file ] + } + .set { ch_input_mutation_compressed_split } + + // Tabix compressed files + TABIX_TABIX( ch_input_mutation_compressed_split.compressed_mutations ) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + // bgzip & tabix uncompressed files + TABIX_BGZIPTABIX( ch_input_mutation_compressed_split.uncompressed_mutations ) + + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + + // Recombine tabix & gzipped input + ch_input_mutation_compressed_split.compressed_mutations + .join(TABIX_TABIX.out.tbi) + .set { ch_input_tabix } + + // Recombine the channels & Create input for bcftools norm + TABIX_BGZIPTABIX.out.gz_tbi + .mix(ch_input_tabix) + .map{ meta, input_file, index_file -> + [ meta, input_file, index_file ] } + .set { ch_bcfnorm_input } + + ch_bcfnorm_meta2 = ch_bcfnorm_input + .map{ meta, input_file, index_file -> meta["ref"]} + + // Normalize the vcf input + BCFTOOLS_NORM ( + ch_bcfnorm_input, + ch_bcfnorm_meta2.combine(fasta) + ) + + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions) + + } + + /* + ======================================================================================== + RUN QUERYNATOR MODULES (CGI & CIViC & CREATE REPORT) + ======================================================================================== + */ + + /* + ------------------------ + CGI + ------------------------ + */ + + if (params.databases.contains("cgi")) { + //MODULE: Run querynator query_cgi + QUERYNATOR_CGIAPI( ch_cgi_input ) + + ch_versions = ch_versions.mix(QUERYNATOR_CGIAPI.out.versions) + } + + /* + ------------------------ + CIViC + ------------------------ + */ + + if (params.databases.contains("civic")) { + + // MODULE: Run querynator query_civic + QUERYNATOR_CIVICAPI( BCFTOOLS_NORM.out.vcf ) -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + ch_versions = ch_versions.mix(QUERYNATOR_CIVICAPI.out.versions) } - NfcoreTemplate.summary(workflow, params, log) + + + /* + ------------------------ + CREATE REPORT + ------------------------ + */ + + if (params.databases.contains("civic") && params.databases.contains("cgi")) { + + QUERYNATOR_CGIAPI.out.result_dir + .join(QUERYNATOR_CIVICAPI.out.result_dir) + .set { ch_report_input } + + QUERYNATOR_CREATEREPORT( ch_report_input ) + + ch_versions = ch_versions.mix(QUERYNATOR_CREATEREPORT.out.versions) + } + + // Collate and save software versions + + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + + emit: + + versions = ch_versions // channel: [ path(versions.yml) ] + } + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END