diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..b290e09 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..72dda28 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,33 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset + +[/assets/email*] +indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7a2dabc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 0000000..191fabd --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..8a3519a --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,123 @@ +# nf-core/deepmodeloptim: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving nf-core/deepmodeloptim. + +We try to manage the required tasks for nf-core/deepmodeloptim using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +> [!NOTE] +> If you need help using or modifying nf-core/deepmodeloptim then the best place to ask is on the nf-core Slack [#deepmodeloptim](https://nfcore.slack.com/channels/deepmodeloptim) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Contribution workflow + +If you'd like to write some code for nf-core/deepmodeloptim, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [nf-core/deepmodeloptim issues](https://github.com/nf-core/deepmodeloptim/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/deepmodeloptim repository](https://github.com/nf-core/deepmodeloptim) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Getting help + +For further information/help, please consult the [nf-core/deepmodeloptim documentation](https://nf-co.re/deepmodeloptim/usage) and don't hesitate to get in touch on the nf-core Slack [#deepmodeloptim](https://nfcore.slack.com/channels/deepmodeloptim) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/deepmodeloptim code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/deepmodeloptim/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..48f759e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/deepmodeloptim pipeline documentation](https://nf-co.re/deepmodeloptim/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 23.04.0)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/deepmodeloptim _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..e9a9c11 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #deepmodeloptim channel" + url: https://nfcore.slack.com/channels/deepmodeloptim + about: Discussion about the nf-core/deepmodeloptim pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..c140b5b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/deepmodeloptim pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..8a0bf64 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,26 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/deepmodeloptim/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/deepmodeloptim _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core pipelines lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 0000000..c49bbdc --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,56 @@ +name: nf-core AWS full size tests +# This workflow is triggered on PRs opened against the master branch. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + pull_request: + branches: + - master + workflow_dispatch: + pull_request_review: + types: [submitted] + +jobs: + run-platform: + name: Run AWS full tests + # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered + if: github.repository == 'nf-core/deepmodeloptim' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: octokit/request-action@v2.x + id: check_approvals + with: + route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - id: test_variables + if: github.event_name != 'workflow_dispatch' + run: | + JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' + CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') + test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + # TODO nf-core: You can customise AWS full pipeline tests as required + # Add full size test data (but still relatively small datasets for few samples) + # on the `test_full.config` test runs with only one set of parameters + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/deepmodeloptim/work-${{ github.sha }} + parameters: | + { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/deepmodeloptim/results-${{ github.sha }}" + } + profiles: test_full + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml new file mode 100644 index 0000000..efa3e85 --- /dev/null +++ b/.github/workflows/awstest.yml @@ -0,0 +1,33 @@ +name: nf-core AWS test +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. +# It runs the -profile 'test' on AWS batch + +on: + workflow_dispatch: +jobs: + run-platform: + name: Run AWS tests + if: github.repository == 'nf-core/deepmodeloptim' + runs-on: ubuntu-latest + steps: + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/deepmodeloptim/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/deepmodeloptim/results-test-${{ github.sha }}" + } + profiles: test + + - uses: actions/upload-artifact@v4 + with: + name: Seqera Platform debug log file + path: | + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 0000000..2b4dceb --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'nf-core/deepmodeloptim' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/deepmodeloptim ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d4c270f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,85 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + workflow_dispatch: + +env: + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/deepmodeloptim') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + test_name: + - "test_stub" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: "Stub run of the pipeline ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + run: | + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/ci_nextflow.yml b/.github/workflows/ci_nextflow.yml deleted file mode 100644 index 271c1cb..0000000 --- a/.github/workflows/ci_nextflow.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: pipeline CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - main - pull_request: - release: - types: [published] - -env: - NXF_ANSI_LOG: false - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'mathysgrapotte/stimulus') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "23.04.0" - - "latest-everything" - steps: - - name: Check out pipeline code - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: Run pipeline with test data - # TODO: Get rid of loca profile when the nf-core like configs are in - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker - - test_stub: - name: Stub run of the pipeline - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'mathysgrapotte/stimulus') }} - runs-on: ubuntu-latest - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with stub-run in alphafold2 split mode - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_stub,docker diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..0b6b1f2 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..713dc3e --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,119 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> ${GITHUB_ENV} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> ${GITHUB_ENV} + + - name: Compare container image counts + run: | + if [ "${{ env.IMAGE_COUNT_INITIAL }}" -ne "${{ env.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ env.IMAGE_COUNT_INITIAL }} + final_count=${{ env.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 0000000..4939a8e --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/deepmodeloptim' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/deepmodeloptim/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..a502573 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,83 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 0000000..42e519b --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 0000000..c6ba35d --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" | sed 's/-//g' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 0000000..e8aafe4 --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index 671442d..83f7959 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,17 @@ -#stuff not needed for commits - +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +null/ bin/**/__pycache__/ /.ipynb_checkpoints /notebook/.ipynb_checkpoints .ipynb_checkpoints -.nextflow* -/work/ -.DS_Store /singularity_cache -/results/ .coverage .vscode/ bin/.vscode/ diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000..4611863 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,17 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + #- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..42b703b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,33 @@ +bump_version: null +lint: + files_exist: + - conf/igenomes.config + - conf/igenomes_ignored.config + - assets/multiqc_config.yml + - conf/igenomes.config + - conf/igenomes_ignored.config + - assets/multiqc_config.yml + files_unchanged: + - .github/CONTRIBUTING.md + - assets/sendmail_template.txt + - .github/CONTRIBUTING.md + - assets/sendmail_template.txt + multiqc_config: false +nf_core_version: 3.0.2 +org_path: null repository_type: pipeline +template: + author: Mathys Grapotte + description: nf-core/deepmodeloptim is an end-to-end nextflow based pipeline for + statistically testing training procedures of machine learning models + force: false + is_nfcore: true + name: deepmodeloptim + org: nf-core + outdir: . + skip_features: + - igenomes + - multiqc + - fastqc + version: 1.0.0dev +update: null diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9e9f0e1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "3.0.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..437d763 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,12 @@ +email_template.html +adaptivecard.json +slackreport.json +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc +bin/ diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 0000000..c81f9a7 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..31e4309 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,16 @@ +# nf-core/deepmodeloptim: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## v1.0.0dev - [date] + +Initial release of nf-core/deepmodeloptim, created with the [nf-core](https://nf-co.re/) template. + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 0000000..f3cf692 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,33 @@ +# nf-core/deepmodeloptim: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..c089ec7 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,182 @@ +# Code of Conduct at nf-core (v1.4) + +## Our Pledge + +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: + +- Age +- Ability +- Body size +- Caste +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Neurodiversity +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status + +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. + +## Preamble + +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: + +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. + +## Our Responsibilities + +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. + +## When and where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Accept feedback, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) +- Take breaks when you feel like you need them. +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. + +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for reporting CoC violations + +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. + +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. + +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. + +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. + +## Attribution and Acknowledgements + +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) + +## Changelog + +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 + +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfiles/ps/Dockerfile b/Dockerfiles/ps/Dockerfile index 349d802..54e6acb 100644 --- a/Dockerfiles/ps/Dockerfile +++ b/Dockerfiles/ps/Dockerfile @@ -12,4 +12,4 @@ RUN micromamba install -y -n base -c defaults -c bioconda -c conda-forge \ && micromamba clean -a -y ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" -USER root \ No newline at end of file +USER root diff --git a/Dockerfiles/stimulus/Dockerfile b/Dockerfiles/stimulus/Dockerfile index df3353e..625715d 100644 --- a/Dockerfiles/stimulus/Dockerfile +++ b/Dockerfiles/stimulus/Dockerfile @@ -18,4 +18,4 @@ RUN pip install \ ray[train,tune,default]==2.23.0 \ matplotlib==3.9.0 \ pandas==2.2.0 \ - safetensors==0.4.5 + safetensors==0.4.5 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..788a749 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Mathys Grapotte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index ad56cd9..ca1fcc6 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,82 @@ -# STIMULUS +

+ + + nf-core/deepmodeloptim + +

-Stochastic Testing and Input Manipulation for Unbiased Learning Systems (STIMULUS) is an end-to-end nextflow based pipeline for statistically testing training procedures of machine learning models. +[![GitHub Actions CI Status](https://github.com/nf-core/deepmodeloptim/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/deepmodeloptim/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/deepmodeloptim/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/deepmodeloptim/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/deepmodeloptim/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/deepmodeloptim) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23deepmodeloptim-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/deepmodeloptim)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) + +## Introduction + +**nf-core/deepmodeloptim** is a bioinformatics end-to-end pipeline that performs statistically testing training procedures of machine learning models. Deep learning model development in natural science is an empirical and costly process. Users must define a pre-processing pipeline, an architecture, find the best parameters for said architecture and iterate over this process. Leveraging the power of Nextflow (polyglotism, container integration, scalable on the cloud), we propose STIMULUS, an open-source software built to automatize deep learning model development for genomics. -STIMULUS takes as input a user defined PyTorch model, a dataset, a configuration file to describe the pre-processing steps to be performed, and a range of parameters for the PyTorch model. It then transforms the data according to all possible pre-processing steps, finds the best architecture parameters for each of the transformed datasets, performs sanity checks on the models and train a minimal deep learning version for each dataset/architecture. +STIMULUS takes as input a user defined PyTorch model, a dataset, a configuration file to describe the pre-processing steps to be performed, and a range of parameters for the PyTorch model. It then transforms the data according to all possible pre-processing steps, finds the best architecture parameters for each of the transformed datasets, performs sanity checks on the models and train a minimal deep learning version for each dataset/architecture. Those experiments are then compiled into an intuitive report, making it easier for scientists to pick the best design choice to be sent to large scale training. ![alt text](https://github.com/mathysgrapotte/stimulus/blob/readme/visual_assets/stimulus_overview.png) -# Quick start +## Usage + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + + + +Now, you can run the pipeline using: + + + +```bash +nextflow run nf-core/deepmodeloptim \ + -profile \ + --input samplesheet.csv \ + --outdir +``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/deepmodeloptim/usage) and the [parameter documentation](https://nf-co.re/deepmodeloptim/parameters). + +## Pipeline output + +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/deepmodeloptim/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/deepmodeloptim/output). + + ## Code requirements @@ -20,37 +84,36 @@ Those experiments are then compiled into an intuitive report, making it easier f The data is provided as a csv where the header columns are in the following format : name:type:class -*name* is user given (note that it has an impact on experiment definition). +_name_ is user given (note that it has an impact on experiment definition). -*type* is either "input", "meta", or "label". "input" types are fed into the mode, "meta" types are registered but not transformed nor fed into the models and "label" is used as a training label. +_type_ is either "input", "meta", or "label". "input" types are fed into the mode, "meta" types are registered but not transformed nor fed into the models and "label" is used as a training label. -*class* is a supported class of data for which encoding methods have been created, please raise an issue on github or contribute a PR if a class of your interest is not implemented +_class_ is a supported class of data for which encoding methods have been created, please raise an issue on github or contribute a PR if a class of your interest is not implemented #### csv general example | input1:input:input_type | input2:input:input_type | meta1:meta:meta_type | label1:label:label_type | label2:label:label_type | -|-------------------------|-------------------------|----------------------|-------------------------|-------------------------| +| ----------------------- | ----------------------- | -------------------- | ----------------------- | ----------------------- | | sample1 input1 | sample1 input2 | sample1 meta1 | sample1 label1 | sample1 label2 | | sample2 input1 | sample2 input2 | sample2 meta1 | sample2 label1 | sample2 label2 | | sample3 input1 | sample3 input2 | sample3 meta1 | sample3 label1 | sample3 label2 | - #### csv specific example +| mouse_dna:input:dna | mouse_rnaseq:label:float | +| ------------------- | ------------------------ | +| ACTAGGCATGCTAGTCG | 0.53 | +| ACTGGGGCTAGTCGAA | 0.23 | +| GATGTTCTGATGCT | 0.98 | -| mouse_dna:input:dna | mouse_rnaseq:label:float| -|-------------------------|-------------------------| -| ACTAGGCATGCTAGTCG | 0.53 | -| ACTGGGGCTAGTCGAA | 0.23 | -| GATGTTCTGATGCT | 0.98 | - -### Model +### Model In STIMULUS, users input a .py file containing a model written in pytorch (see examples in bin/tests/models) Said models should obey to minor standards: 1. The model class you want to train should start with "Model", there should be exactly one class starting with "Model". + ```python import torch @@ -88,14 +151,14 @@ class ModelClass(nn.Module): # your model definition here pass - def forward(self, mouse_dna): + def forward(self, mouse_dna): output = model_layers(mouse_dna) ``` -3. The model should include a **batch** named function that takes as input a dictionary of input "x", a dictionary of labels "y", a Callable loss function and a callable optimizer. +3. The model should include a **batch** named function that takes as input a dictionary of input "x", a dictionary of labels "y", a Callable loss function and a callable optimizer. -In order to allow **batch** to take as input a Callable loss, we define an extra compute_loss function that parses the correct output to the correct loss class. +In order to allow **batch** to take as input a Callable loss, we define an extra compute_loss function that parses the correct output to the correct loss class. ```python @@ -112,7 +175,7 @@ class ModelClass(nn.Module): # your model definition here pass - def forward(self, mouse_dna): + def forward(self, mouse_dna): output = model_layers(mouse_dna) def compute_loss_mouse_rnaseq(self, output: torch.Tensor, mouse_rnaseq: torch.Tensor, loss_fn: Callable) -> torch.Tensor: @@ -122,10 +185,10 @@ class ModelClass(nn.Module): `mouse_rnaseq` is the target tensor -> label column name. `loss_fn` is the loss function to be used. - IMPORTANT : the input variable "mouse_rnaseq" has the same name as the label defined in the csv above. + IMPORTANT : the input variable "mouse_rnaseq" has the same name as the label defined in the csv above. """ return loss_fn(output, mouse_rnaseq) - + def batch(self, x: dict, y: dict, loss_fn: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]: """ Perform one batch step. @@ -163,9 +226,9 @@ class ModelClass(nn.Module): # your model definition here pass - def forward(self, mouse_dna): + def forward(self, mouse_dna): output = model_layers(mouse_dna) - + def batch(self, x: dict, y: dict, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]: """ Perform one batch step. @@ -190,9 +253,43 @@ class ModelClass(nn.Module): ### Experiment design -The file in which all information about how to handle the data before tuning is called an `experiment_config`. This file in `.json` format for now but it will be soon moved to `.yaml`. So this section could vary in the future. +The file in which all information about how to handle the data before tuning is called an `experiment_config`. This file in `.json` format for now but it will be soon moved to `.yaml`. So this section could vary in the future. -The `experiment_config` is a mandatory input for the pipeline and can be passed with the flag `--exp_conf` followed by the `PATH` of the file you want to use. Two examples of `experiment_config` can be found in the `examples` directory. +The `experiment_config` is a mandatory input for the pipeline and can be passed with the flag `--exp_conf` followed by the `PATH` of the file you want to use. Two examples of `experiment_config` can be found in the `examples` directory. ### Experiment config content description. +## Credits + + + +nf-core/deepmodeloptim was originally written by Mathys Grapotte. + +We thank the following people for their extensive assistance in the development of this pipeline: + + + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#deepmodeloptim` channel](https://nfcore.slack.com/channels/deepmodeloptim) (you can join with [this invite](https://nf-co.re/join/slack)). + +## Citations + + + + + + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + +You can cite the `nf-core` publication as follows: + +> **The nf-core framework for community-curated bioinformatics pipelines.** +> +> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. +> +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..40838b8 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/deepmodeloptim v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100644 index 0000000..52f8cf5 --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,53 @@ + + + + + + + + nf-core/deepmodeloptim Pipeline Report + + +
+ + + +

nf-core/deepmodeloptim ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/deepmodeloptim execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ nf-core/deepmodeloptim execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

nf-core/deepmodeloptim

+

https://github.com/nf-core/deepmodeloptim

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100644 index 0000000..64013de --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,39 @@ +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/deepmodeloptim ${version} +---------------------------------------------------- +Run Name: $runName + +<% if (success){ + out << "## nf-core/deepmodeloptim execution completed successfully! ##" +} else { + out << """#################################################### +## nf-core/deepmodeloptim execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +nf-core/deepmodeloptim +https://github.com/nf-core/deepmodeloptim diff --git a/assets/nf-core-deepmodeloptim_logo_light.png b/assets/nf-core-deepmodeloptim_logo_light.png new file mode 100644 index 0000000..3dfa2cf Binary files /dev/null and b/assets/nf-core-deepmodeloptim_logo_light.png differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv new file mode 100644 index 0000000..5f653ab --- /dev/null +++ b/assets/samplesheet.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 0000000..ea7bc14 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,33 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/deepmodeloptim/master/assets/schema_input.json", + "title": "nf-core/deepmodeloptim pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100644 index 0000000..a477370 --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,31 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="nf-core-deepmodeloptim_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="nf-core-deepmodeloptim_logo_light.png" + +<% out << new File("$projectDir/assets/nf-core-deepmodeloptim_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..759b21e --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/deepmodeloptim ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/tests/test_data/dna_experiment/test.csv b/bin/tests/test_data/dna_experiment/test.csv index bcf7c9c..7e089cd 100644 --- a/bin/tests/test_data/dna_experiment/test.csv +++ b/bin/tests/test_data/dna_experiment/test.csv @@ -1,3 +1,3 @@ hello:input:dna,hola:label:float,pet:meta:str ACTGACTGATCGATGC,12,cat -ACTGACTGATCGATGC,12,dog \ No newline at end of file +ACTGACTGATCGATGC,12,dog diff --git a/bin/tests/test_data/dna_experiment/test_config.json b/bin/tests/test_data/dna_experiment/test_config.json index 352e25e..27fb044 100644 --- a/bin/tests/test_data/dna_experiment/test_config.json +++ b/bin/tests/test_data/dna_experiment/test_config.json @@ -15,7 +15,7 @@ "column_name": "hello:input:dna", "name": "ReverseComplement", "params": {} - } + } ], "split": { "name": "RandomSplitter", diff --git a/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv b/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv index f253bef..d1bbea4 100644 --- a/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv +++ b/bin/tests/test_data/dna_experiment/test_unequal_dna_float.csv @@ -2,4 +2,4 @@ hello:input:dna,hola:label:float ACTGACTGATCGATGC,5 ACTGACTGATCGATGC,5 ATCAGTCAG,2.3 -aTCnndhaksdhjtcgaysdgyagctaggat,10 \ No newline at end of file +aTCnndhaksdhjtcgaysdgyagctaggat,10 diff --git a/bin/tests/test_data/dna_experiment/test_with_split.csv b/bin/tests/test_data/dna_experiment/test_with_split.csv index 9de3a6a..718bbd2 100644 --- a/bin/tests/test_data/dna_experiment/test_with_split.csv +++ b/bin/tests/test_data/dna_experiment/test_with_split.csv @@ -46,4 +46,4 @@ CGGTAGTTCACTGAC,1,2,bird CCGGAAGTTCACTGA,1,2,bird TCCGTAAGTTCACTG,1,2,bird ATCGGTAAGTTCACT,1,2,bird -ATCCGGTAAGTTCAC,1,2,bird \ No newline at end of file +ATCCGGTAAGTTCAC,1,2,bird diff --git a/bin/tests/test_data/prot_dna_experiment/test.csv b/bin/tests/test_data/prot_dna_experiment/test.csv index 2157549..d6cb3b5 100644 --- a/bin/tests/test_data/prot_dna_experiment/test.csv +++ b/bin/tests/test_data/prot_dna_experiment/test.csv @@ -1,3 +1,3 @@ hello:input:dna,bonjour:input:prot,hola:label:float,pet:meta:str ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,cat -ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog \ No newline at end of file +ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,dog diff --git a/bin/tests/test_data/prot_dna_experiment/test_config.json b/bin/tests/test_data/prot_dna_experiment/test_config.json index 1a04e20..9979a5c 100644 --- a/bin/tests/test_data/prot_dna_experiment/test_config.json +++ b/bin/tests/test_data/prot_dna_experiment/test_config.json @@ -20,7 +20,7 @@ "column_name": "hello:input:dna", "name": "ReverseComplement", "params": {} - } + } ], "split": { "name": "RandomSplitter", diff --git a/bin/tests/test_data/prot_dna_experiment/test_with_split.csv b/bin/tests/test_data/prot_dna_experiment/test_with_split.csv index ab546e9..a6a1fd5 100644 --- a/bin/tests/test_data/prot_dna_experiment/test_with_split.csv +++ b/bin/tests/test_data/prot_dna_experiment/test_with_split.csv @@ -1,4 +1,4 @@ hello:input:dna,bonjour:input:prot,hola:label:float,split:split:int,pet:meta:str ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,0,cat ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,1,dog -ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird \ No newline at end of file +ACTGACTGATCGATGC,GPRTTIKAKQLETLK,12,2,bird diff --git a/bin/tests/test_data/titanic/process_titanic_to_stimulus.py b/bin/tests/test_data/titanic/process_titanic_to_stimulus.py index 2c96788..211ad63 100644 --- a/bin/tests/test_data/titanic/process_titanic_to_stimulus.py +++ b/bin/tests/test_data/titanic/process_titanic_to_stimulus.py @@ -5,7 +5,7 @@ def arg_parser(): parser = argparse.ArgumentParser(description="Process Titanic dataset to stimulus format") parser.add_argument("--input", type=str, help="Path to input csv file, should be identical to Kaggle download of the Titanic dataset, see : https://www.kaggle.com/c/titanic/data", required=True) parser.add_argument("--output", type=str, help="Path to output csv file", default="titanic_stimulus.csv", required=False) - return parser.parse_args() + return parser.parse_args() def main(): args = arg_parser() @@ -41,4 +41,4 @@ def main(): df.write_csv(args.output) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/bin/tests/test_data/titanic/titanic_stimulus.json b/bin/tests/test_data/titanic/titanic_stimulus.json index 92e4608..0d3b5f2 100644 --- a/bin/tests/test_data/titanic/titanic_stimulus.json +++ b/bin/tests/test_data/titanic/titanic_stimulus.json @@ -1,6 +1,6 @@ { "experiment": "TitanicExperiment", - "interpret_params_mode": "column_wise", + "interpret_params_mode": "column_wise", "split": [ { "name": "RandomSplitter", diff --git a/bin/tests/test_model/dnatofloat_model.py b/bin/tests/test_model/dnatofloat_model.py index 3f48ae9..fed761e 100644 --- a/bin/tests/test_model/dnatofloat_model.py +++ b/bin/tests/test_model/dnatofloat_model.py @@ -1,14 +1,14 @@ -import torch +import torch import torch.nn as nn from typing import Callable, Optional, Tuple - + class ModelSimple(torch.nn.Module): """ A simple model example. - It takes as input a 1D tensor of any size, - apply some convolutional layer and + It takes as input a 1D tensor of any size, + apply some convolutional layer and outputs a single value using a maxpooling layer and a softmax function. - + All functions `forward`, `compute_loss` and `batch` need to be implemented for any new model. """ def __init__(self, kernel_size: int = 3, pool_size: int = 2): @@ -18,20 +18,20 @@ def __init__(self, kernel_size: int = 3, pool_size: int = 2): self.softmax = nn.Softmax(dim=1) # had to change to 6 because dna sequence is shoprter self.linear = nn.Linear(6, 1) - + def forward(self, hello: torch.Tensor) -> dict: """ Forward pass of the model. It should return the output as a dictionary, with the same keys as `y`. """ - x = hello.permute(0, 2, 1).to(torch.float32) # permute the two last dimensions of hello + x = hello.permute(0, 2, 1).to(torch.float32) # permute the two last dimensions of hello x = self.conv1(x) x = self.pool(x) x = self.softmax(x) x = self.linear(x) x = x.squeeze() return x - + def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callable) -> torch.Tensor: """ Compute the loss. @@ -40,14 +40,14 @@ def compute_loss(self, output: torch.Tensor, hola: torch.Tensor, loss_fn: Callab `loss_fn` is the loss function to be used. """ return loss_fn(output, hola.to(torch.float32)) - + def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]: """ Perform one batch step. `x` is a dictionary with the input tensors. `y` is a dictionary with the target tensors. `loss_fn1` and `loss_fn2` are the loss function to be used. - + If `optimizer` is passed, it will perform the optimization step -> training step Otherwise, only return the forward pass output and loss -> evaluation step @@ -62,4 +62,4 @@ def batch(self, x: dict, y: dict, loss_fn1: Callable, loss_fn2: Callable, optimi loss1.backward(retain_graph=True) loss2.backward(retain_graph=True) optimizer.step() - return loss1, output \ No newline at end of file + return loss1, output diff --git a/bin/tests/test_model/titanic_model.py b/bin/tests/test_model/titanic_model.py index 8a5f4d8..426a4ba 100644 --- a/bin/tests/test_model/titanic_model.py +++ b/bin/tests/test_model/titanic_model.py @@ -28,7 +28,7 @@ def forward(self, pclass: torch.Tensor, sex: torch.Tensor, age: torch.Tensor, si x = self.relu(layer(x)) x = self.softmax(self.output_layer(x)) return x - + def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Callable) -> torch.Tensor: """ Compute the loss. @@ -37,7 +37,7 @@ def compute_loss(self, output: torch.Tensor, survived: torch.Tensor, loss_fn: Ca `loss_fn` is the loss function to be used. """ return loss_fn(output, survived) - + def batch(self, x: dict, y: dict, loss_fn: Callable, optimizer: Optional[Callable] = None) -> Tuple[torch.Tensor, dict]: """ Perform one batch step. diff --git a/conf/base.config b/conf/base.config new file mode 100644 index 0000000..87d7b57 --- /dev/null +++ b/conf/base.config @@ -0,0 +1,62 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/deepmodeloptim Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ + +process { + + // TODO nf-core: Check the defaults for all processes + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // TODO nf-core: Customise requirements for specific processes. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_low { + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } + } + withLabel:process_medium { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_high { + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_long { + time = { 20.h * task.attempt } + } + withLabel:process_high_memory { + memory = { 200.GB * task.attempt } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } +} diff --git a/conf/crg.config b/conf/crg.config deleted file mode 100644 index a28efd5..0000000 --- a/conf/crg.config +++ /dev/null @@ -1,35 +0,0 @@ -params { - config_profile_name = 'CRG profile' - config_profile_description = 'Configuration to run on CRG cluster' - - max_cpus = 64 - max_memory = 100.GB - max_time = 48.h -} - - -process { - executor = 'crg' - maxRetries = params.max_retries - errorStrategy = params.err_start - - withLabel:process_low { - queue = 'cn-el7,short-centos79' - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - withLabel:process_medium{ - queue = 'cn-el7,short-centos79' - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_high { - queue = 'cn-el7,long-centos79' - cpus = { check_max( 12 , 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - - } -} diff --git a/conf/crg_slurm.config b/conf/crg_slurm.config deleted file mode 100644 index 724f979..0000000 --- a/conf/crg_slurm.config +++ /dev/null @@ -1,34 +0,0 @@ -params { - config_profile_name = 'CRG profile - slurm new cluster' - config_profile_description = 'Configuration to run on CRG new cluster' - - max_cpus = 64 - max_memory = 100.GB - max_time = 48.h -} - -process { - executor = "slurm" - maxRetries = params.max_retries - errorStrategy = params.err_start - - withLabel:process_low { - clusterOptions = '--qos=shorter' - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } - withLabel:process_medium{ - clusterOptions = '--qos=short' - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_high { - clusterOptions = '--qos=normal' - cpus = { check_max( 12 , 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } - - } -} diff --git a/conf/local.config b/conf/local.config index ee813e3..b1bf94e 100644 --- a/conf/local.config +++ b/conf/local.config @@ -1,27 +1,27 @@ params { - config_profile_name = 'Local profile' - config_profile_description = 'Configuration to run on local machine' + config_profile_name = 'Local profile' + config_profile_description = 'Configuration to run on local machine' } process { - maxRetries = params.max_retries +maxRetries = params.max_retries errorStrategy = params.err_start withLabel:process_low { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } - } + cpus = { 1 } + memory = { 4.GB * task.attempt } + time = { 1.h * task.attempt } + } withLabel:process_medium{ - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 10.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } + cpus = { 4 } + memory = { 10.GB * task.attempt } + time = { 6.h * task.attempt } } withLabel:process_medium_high { - cpus = { check_max( 12 , 'cpus' ) } - memory = { check_max( 50.GB * task.attempt, 'memory' ) } - time = { check_max( 12.h * task.attempt, 'time' ) } + cpus = { 12 } + memory = { 50.GB * task.attempt } + time = { 12.h * task.attempt } } } diff --git a/conf/modules.config b/conf/modules.config index 25aa287..92efa83 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,5 +1,7 @@ -/* config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: ext.args = Additional arguments appended to command in module. ext.args2 = Second set of arguments appended to command in module (multi-tool modules). @@ -36,9 +38,9 @@ process { // the prefix is the input data csv filename without last extantion ( all other dots are changed to underscores) + the specific experimental config json name (dynamically created during interpret json step) ext.prefix = { "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] } - + // the output directory is called by the unique name of the run + the time of launch. - // all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique). + // all subidrs of the output dir are called with the same schema of the prefix above, which makes them process instance specific (aka unique). // at the time of publishing, files are copied and then renamed to a standard naming convention based on their extention and sffixes. This is done through saveAs mode. The files need to be coopied because the original file unrenamed will be further used by the pipeline and it's name has to stay unique (thanks to the prefix), otherwise there wil be filename confilcts. publishDir = [ path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/" + "${combination_key}".split(' ')[0].split('\\.')[0..-2].join("_") + "-" + "${combination_key}".split(' ')[2] }, @@ -51,15 +53,15 @@ process { else if (filename.endsWith(".csv")) "training_data.csv" else if (filename.endsWith(".json")) "experiment_config.json" else filename - } + } ] } withName: "STIMULUS_ANALYSIS_DEFAULT" { // the output directory is called by the unique name of the run + the time of launch. - // it has subdirs that identify the class of analysises like -> /analysis_default/. - // the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir. + // it has subdirs that identify the class of analysises like -> /analysis_default/. + // the subsubdirs have the name of the split informstion used to create the test set. So conceptually all analyses carried on the same test set are under the same subsubdir. publishDir = [ path: { "${params.outdir}/${workflow.runName}_" + "${workflow.start}/".replaceAll('[-:]', '_').split('\\.')[0] + "/analysis_default/" + "${split_transform_key}" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index 2b13879..24434b7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,6 +15,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline functions' // Input data + // TODO move the data to nf-core/test-datasets csv = "${projectDir}/bin/tests/test_data/dna_experiment/test_with_split.csv" exp_conf = "${projectDir}/examples/test.json" model = "${projectDir}/bin/tests/test_model/dnatofloat_model.py" @@ -27,18 +28,18 @@ process { errorStrategy = params.err_start withLabel:process_low { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 10.m * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 4.GB * task.attempt } + time = { 10.m * task.attempt } } - withLabel:process_medium{ - cpus = { check_max( 2 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 30.m * task.attempt, 'time' ) } + withLabel:process_medium { + cpus = { 2 } + memory = { 6.GB * task.attempt } + time = { 30.m * task.attempt } } withLabel:process_high { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 4 } + memory = { 8.GB * task.attempt } + time = { 1.h * task.attempt } } -} \ No newline at end of file +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 0000000..5826280 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,24 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/deepmodeloptim -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + + // Fasta references + fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz' +} diff --git a/conf/test_learn.config b/conf/test_learn.config index 0f2df49..31f7217 100644 --- a/conf/test_learn.config +++ b/conf/test_learn.config @@ -6,11 +6,6 @@ params { config_profile_name = 'Test Learn profile' config_profile_description = 'Minimal test dataset to check if a model that should learn actually does' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '8.GB' - max_time = '6.h' - // Input data csv = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.csv" exp_conf = "${projectDir}/bin/tests/test_data/titanic/titanic_stimulus.json" // TODO make the data transformations available to titanic data types @@ -19,24 +14,30 @@ params { } -// Limit resources so that this can run on GitHub Actions or take benefit from the check_max function +// Limit resources so that this can run on GitHub Actions process { + resourceLimits = [ + cpus: 2, + memory: 8.GB, + time: 6.h + ] + maxRetries = params.max_retries errorStrategy = params.err_start withLabel:process_low { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 4.GB * task.attempt, 'memory' ) } - time = { check_max( 10.m * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 4.GB * task.attempt } + time = { 10.m * task.attempt } } withLabel:process_medium{ - cpus = { check_max( 2 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 30.m * task.attempt, 'time' ) } + cpus = { 2 } + memory = { 6.GB * task.attempt } + time = { 30.m * task.attempt } } withLabel:process_high { - cpus = { check_max( 4 , 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 4 } + memory = { 8.GB * task.attempt } + time = { 1.h * task.attempt } } } diff --git a/conf/test_stub.config b/conf/test_stub.config index befeb1d..861f494 100644 --- a/conf/test_stub.config +++ b/conf/test_stub.config @@ -6,11 +6,6 @@ stubRun = true params { config_profile_name = 'Test stub profile' config_profile_description = 'stub run to check pipeline function' - - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' // Input data csv = "${projectDir}/bin/tests/test_data/dna_experiment/test.csv" @@ -20,6 +15,13 @@ params { } process { + // Limit resources so that this can run on GitHub Actions + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 6.h + ] + withName: 'INTERPRET_JSON|STIMULUS_NOISE_CSV|STIMULUS_SHUFFLE_CSV|STIMULUS_SPLIT_CSV|TORCH_TRAIN' { container = 'quay.io/biocontainers/gawk:5.1.0' } diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..5ab7937 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,10 @@ +# nf-core/deepmodeloptim: Documentation + +The nf-core/deepmodeloptim documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/nf-core-deepmodeloptim_logo_dark.png b/docs/images/nf-core-deepmodeloptim_logo_dark.png new file mode 100644 index 0000000..49f0fec Binary files /dev/null and b/docs/images/nf-core-deepmodeloptim_logo_dark.png differ diff --git a/docs/images/nf-core-deepmodeloptim_logo_light.png b/docs/images/nf-core-deepmodeloptim_logo_light.png new file mode 100644 index 0000000..f988159 Binary files /dev/null and b/docs/images/nf-core-deepmodeloptim_logo_light.png differ diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 0000000..4a4b659 --- /dev/null +++ b/docs/output.md @@ -0,0 +1,30 @@ +# nf-core/deepmodeloptim: Output + +## Introduction + +This document describes the output produced by the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + + + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..42ec1b6 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,218 @@ +# nf-core/deepmodeloptim: Usage + +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/deepmodeloptim/usage](https://nf-co.re/deepmodeloptim/usage) + +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ + +## Introduction + + + +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. + +```bash +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +``` + +### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. + +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. + +```csv title="samplesheet.csv" +sample,fastq_1,fastq_2 +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +``` + +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```bash +nextflow run nf-core/deepmodeloptim --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/deepmodeloptim -profile docker -params-file params.yaml +``` + +with: + +```yaml title="params.yaml" +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull nf-core/deepmodeloptim +``` + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [nf-core/deepmodeloptim releases page](https://github.com/nf-core/deepmodeloptim/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: + +## Core Nextflow arguments + +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/examples/pipeline_generated.json b/examples/pipeline_generated.json index 7531ac7..fede8da 100644 --- a/examples/pipeline_generated.json +++ b/examples/pipeline_generated.json @@ -3,30 +3,26 @@ "transform": [ [ { - "column_name": "hello:input:dna", "name": "UniformTextMasker", - "params": {"probability": 0.1} + "params": { "probability": 0.1 } }, { - "column_name": "bonjour:input:prot", "name": "UniformTextMasker", - "params": {"probability": 0.4} + "params": { "probability": 0.4 } } ], [ { "column_name": "hola:label:float", "name": "GaussianNoise", - "params": {"mean": 0.5, "std": 0.1} + "params": { "mean": 0.5, "std": 0.1 } } - ] + ] ], - "split": - { - "name": "RandomSplitter", - "params": {"split": [0.6, 0.2, 0.2]} - } - -} \ No newline at end of file + "split": { + "name": "RandomSplitter", + "params": { "split": [0.6, 0.2, 0.2] } + } +} diff --git a/examples/test.json b/examples/test.json index 8aa39c7..868e233 100644 --- a/examples/test.json +++ b/examples/test.json @@ -1,18 +1,18 @@ { "experiment": "DnaToFloatExperiment", - "interpret_params_mode": "column_wise", + "interpret_params_mode": "column_wise", "transform": [ { "column_name": "hello:input:dna", "name": ["UniformTextMasker", "ReverseComplement"], - "params": [{"probability": [0.2], "seed": [42]}, "default"] + "params": [{ "probability": [0.2], "seed": [42] }, "default"] }, null ], "split": [ { "name": "RandomSplitter", - "params": [{"split": [[0.34, 0.33, 0.33]], "seed": [42]}] + "params": [{ "split": [[0.34, 0.33, 0.33]], "seed": [42] }] }, null ] diff --git a/examples/user_given.json b/examples/user_given.json index 82e5a85..e671480 100644 --- a/examples/user_given.json +++ b/examples/user_given.json @@ -1,11 +1,15 @@ { "experiment": "DnaToFloatExperiment", - "interpret_parmas_mode": "culumn_wise", + "interpret_parmas_mode": "culumn_wise", "transform": [ { "column_name": "hello:input1:dna", "name": ["UniformTextMasker", "AnotherNoiser", "AnotherNoiser"], - "params": [{"probability": [0.1, 0.2, 0.3]}, {"probability": [0.11, 0.21, 0.31]}, {"probability": [0.12, 0.22, 0.32]}] + "params": [ + { "probability": [0.1, 0.2, 0.3] }, + { "probability": [0.11, 0.21, 0.31] }, + { "probability": [0.12, 0.22, 0.32] } + ] }, { "column_name": "hello:input2:prot", @@ -15,13 +19,20 @@ { "column_name": "hola:label:float", "name": "GaussianNoise", - "params": [{"mean": [0.5, 0.6, 0.7], "std": [0.1, 0.2, 0.3]}] + "params": [{ "mean": [0.5, 0.6, 0.7], "std": [0.1, 0.2, 0.3] }] } ], "split": [ { "name": "RandomSplitter", - "params": [{"split": [[0.6, 0.2, 0.2], [0.7, 0.15, 0.15]]}] + "params": [ + { + "split": [ + [0.6, 0.2, 0.2], + [0.7, 0.15, 0.15] + ] + } + ] }, { "name": "SomeSplitter", @@ -34,33 +45,31 @@ { "column_name": "hello:input:dna", "name": "UniformTextMasker", - "params": {"probability": 0.1} + "params": { "probability": 0.1 } }, { "column_name": "hola:label:float", "name": "GaussianNoise", - "params": {"mean": 0.4, "std": 0.15} + "params": { "mean": 0.4, "std": 0.15 } } ], - "split": - { - "name": "RandomSplitter", - "params": {"split": [0.6, 0.4, 0]} - } + "split": { + "name": "RandomSplitter", + "params": { "split": [0.6, 0.4, 0] } + } }, { "transform": [ { "column_name": "hello:input:dna", "name": "UniformTextMasker", - "params": {"probability": 0.1} + "params": { "probability": 0.1 } } ], - "split": - { - "name": "RandomSplitter", - "params": {"split": [0.6, 0.3, 0.1]} - } + "split": { + "name": "RandomSplitter", + "params": { "split": [0.6, 0.3, 0.1] } + } } ] -} \ No newline at end of file +} diff --git a/main.nf b/main.nf index 90ed407..f8a4eb8 100644 --- a/main.nf +++ b/main.nf @@ -1,93 +1,106 @@ #!/usr/bin/env nextflow /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + nf-core/deepmodeloptim ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/deepmodeloptim + Website: https://nf-co.re/deepmodeloptim + Slack : https://nfcore.slack.com/channels/deepmodeloptim +---------------------------------------------------------------------------------------- */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - log.info paramsHelp("nextflow run main.nf --csv my_data.csv --exp_conf exp.yaml --model my_model.py --tune_conf tune.yaml -profie your_profile ") - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - input handling + RUN ALL WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - +include { DEEPMODELOPTIM } from './workflows/deepmodeloptim' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_deepmodeloptim_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_deepmodeloptim_pipeline' /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CHECK_MODEL } from './subworkflows/check_model.nf' -include { HANDLE_DATA } from './workflows/handle_data.nf' -include { HANDLE_TUNE } from './workflows/handle_tune.nf' -include { HANDLE_ANALYSIS } from './workflows/handle_analysis.nf' +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow NFCORE_DEEPMODELOPTIM { + + take: + // samplesheet // channel: samplesheet read in from --input + csv + exp_conf + model + tune_conf + initial_weights + + main: + + // + // WORKFLOW: Run pipeline + // + DEEPMODELOPTIM ( + // samplesheet, + csv, + exp_conf, + model, + tune_conf, + initial_weights + ) +} /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - workflow { - CHECK_MODEL ( - params.csv, - params.exp_conf, - params.model, - params.tune_conf, - params.initial_weights + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir //, + // params.input ) - completion_message = CHECK_MODEL.out.completion_message - HANDLE_DATA( + // + // WORKFLOW: Run main workflow + // + NFCORE_DEEPMODELOPTIM ( + // PIPELINE_INITIALISATION.out.samplesheet, params.csv, params.exp_conf, - completion_message - ) - prepared_data = HANDLE_DATA.out.data - //HANDLE_DATA.out.data.view() - - HANDLE_TUNE( params.model, params.tune_conf, - prepared_data, params.initial_weights ) - //HANDLE_TUNE.out.model.view() - //HANDLE_TUNE.out.tune_out.view() - - // this part works, but the docker container is not updated with matplotlib yet - HANDLE_ANALYSIS( - HANDLE_TUNE.out.tune_out, - HANDLE_TUNE.out.model - ) - -} - -workflow.onComplete { - println "Done" + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url + ) } /* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..be65d1b --- /dev/null +++ b/modules.json @@ -0,0 +1,30 @@ +{ + "name": "nf-core/deepmodeloptim", + "homePage": "https://github.com/nf-core/deepmodeloptim", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": {} + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "installed_by": ["subworkflows"] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", + "installed_by": ["subworkflows"] + } + } + } + } + } +} diff --git a/modules/local/check_torch_model.nf b/modules/local/check_torch_model.nf index 929623f..75caac7 100644 --- a/modules/local/check_torch_model.nf +++ b/modules/local/check_torch_model.nf @@ -3,8 +3,9 @@ process CHECK_TORCH_MODEL { tag "$experiment_config - $original_csv" label 'process_medium' - container "mathysgrapotte/stimulus-py:latest" - + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" + input: tuple path(original_csv), path(model), path(experiment_config), path(ray_tune_config), path(initial_weights) diff --git a/modules/local/interpret_json.nf b/modules/local/interpret_json.nf index 464d3cc..e2fac07 100644 --- a/modules/local/interpret_json.nf +++ b/modules/local/interpret_json.nf @@ -3,8 +3,9 @@ process INTERPRET_JSON { tag "$user_json" label 'process_low' - container "mathysgrapotte/stimulus-py:latest" - + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" + input: path user_json val message_from_check_model // only here to ensure that this module waits for check_model module to actually run @@ -16,7 +17,7 @@ process INTERPRET_JSON { script: """ - stimulus-interpret-json -j ${user_json} + stimulus-interpret-json -j ${user_json} """ stub: diff --git a/modules/local/stimulus_analysis_default.nf b/modules/local/stimulus_analysis_default.nf index a2ecf11..5b9e59c 100644 --- a/modules/local/stimulus_analysis_default.nf +++ b/modules/local/stimulus_analysis_default.nf @@ -3,17 +3,11 @@ process STIMULUS_ANALYSIS_DEFAULT { tag "${model} - ${split_transform_key}" label 'process_medium' - container "mathysgrapotte/stimulus-py:latest" + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" input: - tuple val(split_transform_key), \ - val(combination_key), \ - path(data), \ - path(experiment_config), \ - path(model_config), \ - path(weights), \ - path(optimizer), \ - path(metrics) + tuple val(split_transform_key), val(combination_key), path(data), path(experiment_config), path(model_config), path(weights), path(optimizer), path(metrics) path(model) output: diff --git a/modules/local/stimulus_shuffle_csv.nf b/modules/local/stimulus_shuffle_csv.nf index c8c433a..e1f8ad0 100644 --- a/modules/local/stimulus_shuffle_csv.nf +++ b/modules/local/stimulus_shuffle_csv.nf @@ -3,7 +3,8 @@ process STIMULUS_SHUFFLE_CSV { tag "${original_csv} - shuffles" label 'process_medium' - container "mathysgrapotte/stimulus-py:latest" + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" input: tuple val(split_transform_key), path(splitted_csv), path(split_json), path(original_csv) diff --git a/modules/local/stimulus_split_csv.nf b/modules/local/stimulus_split_csv.nf index 0d4accd..a9148e2 100644 --- a/modules/local/stimulus_split_csv.nf +++ b/modules/local/stimulus_split_csv.nf @@ -1,9 +1,10 @@ process STIMULUS_SPLIT_CSV { - + tag "${original_csv} - ${split_transform_key}" label 'process_low' - container "mathysgrapotte/stimulus-py:latest" + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" input: tuple val(split_transform_key), path(split_json), path(original_csv) diff --git a/modules/local/stimulus_transform_csv.nf b/modules/local/stimulus_transform_csv.nf index 007d201..994e045 100644 --- a/modules/local/stimulus_transform_csv.nf +++ b/modules/local/stimulus_transform_csv.nf @@ -3,7 +3,8 @@ process STIMULUS_TRANSFORM_CSV { tag "${original_csv} - ${combination_key}" label 'process_medium' - container "mathysgrapotte/stimulus-py:latest" + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" input: tuple val(split_transform_key), val(combination_key), path(transform_json), path(splitted_csv), path(split_json), path(original_csv) diff --git a/modules/local/torch_tune.nf b/modules/local/torch_tune.nf index 783f597..7b4c893 100644 --- a/modules/local/torch_tune.nf +++ b/modules/local/torch_tune.nf @@ -3,30 +3,22 @@ process TORCH_TUNE { tag "${model} - ${combination_key}" label 'process_high' - container "mathysgrapotte/stimulus-py:latest" + // TODO: push image to nf-core quay.io + container "docker.io/mathysgrapotte/stimulus-py:latest" input: tuple val(combination_key), val(split_transform_key), path(ray_tune_config), path(model), path(data_csv), path(experiment_config), path(initial_weights) output: - tuple val(split_transform_key), - val(combination_key), - path(data_csv), - path(experiment_config), - path("*-config.json"), - path("*-model.safetensors"), - path("*-optimizer.pt"), - path("*-metrics.csv"), - path(initial_weights), - emit: tune_specs + tuple val(split_transform_key), val(combination_key), path(data_csv), path(experiment_config), path("*-config.json"), path("*-model.safetensors"), path("*-optimizer.pt"), path("*-metrics.csv"), path(initial_weights), emit: tune_specs + // output the debug files if they are present, making this an optional channel tuple val("debug_${prefix}"), - path("ray_results/*/debug/best_model_*.txt"), - path("ray_results/*/debug/worker_with_seed_*/model.safetensors"), - path("ray_results/*/debug/worker_with_seed_*/seeds.txt"), - emit: debug, - optional: true - + path("ray_results/*/debug/best_model_*.txt"), + path("ray_results/*/debug/worker_with_seed_*/model.safetensors"), + path("ray_results/*/debug/worker_with_seed_*/seeds.txt"), + emit: debug, + optional: true script: // prefix should be global so that is seen in the output section diff --git a/nextflow.config b/nextflow.config index 790df96..9247834 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,11 +1,21 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/deepmodeloptim Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ + +// Global default params, used in configs params { - // Inputs + // Input options + input = null // TODO either change one of the params above by input or create a samplesheet containing the info of them csv = null // the input file containing all input data model = null // the model file in python, the model that will be tested by this pipeline exp_conf = null // the json config file that specifies all the parameters relative to the data manipulation tune_conf = null // the config file with all the hyperparameter directives (choiches) and all ray tune specs - + // Optional inputs initial_weights = null // the initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run. @@ -14,10 +24,11 @@ params { publish_dir_mode = "copy" // Computational resources - max_cpus = 12 // this flasg and the following are for regulating resources, profiles can overwrite these. - max_gpus = 1 // requesting the gpus for the tuning steps. - max_memory = 32.GB - max_time = "72.h" + max_gpus = 1 // requesting the gpus for the tuning steps. + // TODO this should not be in the nextflow config but in each config + // max_cpus = 12 // this flasg and the following are for regulating resources, profiles can overwrite these. + // max_memory = 32.GB + // max_time = "72.h" // Error options max_retries = 0 @@ -28,125 +39,255 @@ params { check_model_num_samples = null // optional flag to do a more extensive check during check_model. This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be. shuffle = true // flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. (If the way we think at shuffle change maybe is better to remove this flag and make it into a parameter of the user given json for noise nad split) debug_mode = false // flag used to switch to debug mode for the pipeline. - + // General - singularity_cache_dir = "singularity_cache" help = false validate_params = true // tells wether or not to validate input values using nf-schema. + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + help_full = false + show_hidden = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + // Config options config_profile_name = null config_profile_description = null + + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Schema validation default options + validate_params = true } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } docker { - docker.enabled = true - docker.runOptions = '-u $(id -u):$(id -g)' - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } - apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - apptainer.cacheDir = "${params.singularity_cache_dir}" - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - singularity.cacheDir = "${params.singularity_cache_dir}" - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - process.debug = true - cleanup = false - nextflow.enable.configProcessNamesValidation = true + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - crg { includeConfig "conf/crg.config" } - crg_slurm { includeConfig "conf/crg_slurm.config" } test { includeConfig "conf/test.config" } test_learn { includeConfig "conf/test_learn.config" } test_stub { includeConfig "conf/test_stub.config" } local { includeConfig "conf/local.config" } + test_full { includeConfig 'conf/test_full.config' } } +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" -// Nextflow plugins -plugins { - id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet +// Load nf-core/deepmodeloptim custom profiles from different institutions. +// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs +// TODO This line can be uncommented once you move the pipeline to nf-core. A file deepmodeloptim.config will be automatically created at the nf-core/configs repo. This is to allow having a pipeline specific config. +// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/deepmodeloptim.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + + + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } +// Set bash options +process.shell = """\ +bash + +set -e # Exit if a tool returns a non-zero status/exit code +set -u # Treat unset variables and parameters as an error +set -o pipefail # Returns the status of the last command to exit with a non-zero status or zero if all successfully execute +set -C # No clobber - prevent output redirection from overwriting files. +""" + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false -// trace/report options -// this will allow the pipeline to create tracing/report files with all the steps and the time/memory/cpu they took def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') -def trace_dir = "${params.outdir}/pipeline_info" timeline { enabled = true - file = "${trace_dir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${trace_dir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${trace_dir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${trace_dir}/execution_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } +manifest { + name = 'nf-core/deepmodeloptim' + author = """Mathys Grapotte""" + homePage = 'https://github.com/nf-core/deepmodeloptim' + description = """nf-core/deepmodeloptim is an end-to-end nextflow based pipeline for statistically testing training procedures of machine learning models""" + mainScript = 'main.nf' + nextflowVersion = '!>=24.04.2' + version = '1.0.0dev' + doi = '' +} + +// Nextflow plugins +plugins { + id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +validation { + defaultIgnoreParams = ["genomes"] + help { + enabled = true + command = "nextflow run $manifest.name -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/${manifest.name}/blob/master/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText } } + +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d0d0c3..e41d30d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,237 +1,294 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/nextflow-io/rnaseq-nf/master/nextflow_schema.json", - "title": "stimulus pipeline parameters", - "description": "Pipeline for statistically testing training procedures of machine learning models", - "type": "object", - "definitions": { - "input_files_options": { - "title": "Input files options", - "type": "object", - "fa_icon": "fas fa-terminal", - "description": "Define where the pipeline should find input data.", - "properties": { - "csv": { - "type": "string", - "format": "file-path", - "description": "Test data as CSV file", - "fa_icon": "fas fa-folder-open", - "mimetype": "tesxt/csv", - "help_text": "the input file containing all input data" + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/nf-core/deepmodeloptim/master/nextflow_schema.json", + "title": "nf-core/deepmodeloptim pipeline parameters", + "description": "nf-core/deepmodeloptim is an end-to-end nextflow based pipeline for statistically testing training procedures of machine learning models", + "required": ["csv", "exp_conf", "model", "tune_conf", "outdir"], + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input files options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data.", + "properties": { + "input": { + "type": "string", + "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/proteinfold/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" + }, + "csv": { + "type": "string", + "format": "file-path", + "description": "Test data as CSV file", + "fa_icon": "fas fa-folder-open", + "mimetype": "tesxt/csv", + "help_text": "the input file containing all input data" + }, + "exp_conf": { + "type": "string", + "format": "file-path", + "description": "Experiment config as JSON format", + "fa_icon": "fas fa-folder-open", + "help_text": "the json config file that specifies all the parameters relative to the data manipulation." + }, + "model": { + "type": "string", + "format": "file-path", + "description": "Model file in Python", + "fa_icon": "fas fa-folder-open", + "mimetype": "text/py", + "help_text": "the model file in python, the model that will be tested by this pipeline." + }, + "tune_conf": { + "type": "string", + "format": "file-path", + "description": "Tuning config in yaml format", + "fa_icon": "fas fa-folder-open", + "mimetype": "text/yaml", + "help_text": "the config file with all the hyperparameter directives (choiches) and all ray tune specs." + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "help_text": "The directory will contain a subdirectory with a name unique to each stimulus pipeline run." + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + } + } }, - "exp_conf": { - "type": "string", - "format": "file-path", - "description": "Experiment config as JSON format", - "fa_icon": "fas fa-folder-open", - "help_text": "the json config file that specifies all the parameters relative to the data manipulation." + "optional_inputs": { + "title": "Optional inputs", + "type": "object", + "description": "Files that can be provided as optional input or omitted.", + "default": "", + "fa_icon": "fas fa-terminal", + "properties": { + "initial_weights": { + "type": "string", + "fa_icon": "fas fa-folder-open", + "help_text": "The initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run.", + "description": "File to be used to initialize the model in tuning", + "format": "path" + } + } }, - "model": { - "type": "string", - "format": "file-path", - "description": "Model file in Python", - "fa_icon": "fas fa-folder-open", - "mimetype": "text/py", - "help_text": "the model file in python, the model that will be tested by this pipeline." + "resources_options": { + "title": "Resources options", + "type": "object", + "description": "Specify maximun processes resources", + "default": "", + "properties": { + "max_gpus": { + "type": "integer", + "default": 1, + "minimum": 0, + "help_text": "requesting the gpus for the tuning steps.", + "description": "set maximum GPU limit" + } + } }, - "tune_conf": { - "type": "string", - "format": "file-path", - "description": "Tuning config in yaml format", - "fa_icon": "fas fa-folder-open", - "mimetype": "text/yaml", - "help_text": "the config file with all the hyperparameter directives (choiches) and all ray tune specs." - } - }, - "required": ["csv", "exp_conf", "model", "tune_conf"] - }, - "optional_inputs": { - "title": "Optional inputs", - "type": "object", - "description": "files that can be omitted", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "initial_weights": { - "type": "string", - "fa_icon": "fas fa-folder-open", - "help_text": "the initial weights of the model. These files can be used to start the training instead of random initialization. One can provide several files, each of them will be used for a different run.", - "description": "file to be used to initialize the miodel in tuning", - "format": "path" - } - } - }, - "output_options": { - "title": "Output options", - "type": "object", - "description": "Define where and how to publish", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "outdir": { - "type": "string", - "default": "./results/", - "description": "output directory", - "help_text": "The directory will contain a subdirectory with a name unique to each stimulus pipeline run.", - "fa_icon": "fas fa-folder-open" + "on_error_options": { + "title": "On error options", + "type": "object", + "description": "What to do and how to handle errors", + "default": "", + "fa_icon": "fas fa-terminal", + "properties": { + "err_start": { + "type": "string", + "default": "finish", + "description": "Tells the pipeline how to behave on error", + "help_text": "refer to nextflow errorStrategy documentation for more details." + }, + "max_retries": { + "type": "integer", + "default": 0, + "description": "number of time to retry if err_strat is\u00a0set to retry", + "help_text": "this also acts as a multiplier for recources request. If it failed for lack of resources it automaticly asks more the second time. take a look at test.conf for more details." + } + } }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "publish miode" - } - } - }, - "resorces_options": { - "title": "Resorces options", - "type": "object", - "description": "Specify maximun processes resources", - "default": "", - "properties": { - "max_cpus": { - "type": "integer", - "default": 12, - "minimum": 1, - "description": "set maximum CPU limit" + "skip_options": { + "title": "Skip options", + "type": "object", + "description": "options to skip or change bhaviour of pipeline", + "default": "", + "fa_icon": "fas fa-terminal", + "properties": { + "check_model": { + "type": "boolean", + "default": true, + "description": "checks if all input are comatible and the model can be tuned.", + "help_text": "flag to tell whether to check or not if the model can be tuned and trained. It does one call of the batch function, (predicting), of the model importing and using everything needed for that. Default run such a check." + }, + "check_model_num_samples": { + "type": "string", + "description": "optional flag to do a more/less extensive check during check_model.", + "help_text": "This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be. by default is going to be set to 3." + }, + "shuffle": { + "type": "boolean", + "default": true, + "description": "run the shuffle sanity check", + "help_text": "flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. " + }, + "debug_mode": { + "type": "boolean", + "description": "developer flag", + "help_text": "flag used to switch to debug mode for the pipeline. more verbose outputs." + } + } }, - "max_gpus": { - "type": "integer", - "default": 1, - "minimum": 0, - "help_text": "requesting the gpus for the tuning steps.", - "description": "set maximum GPU limit" + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } }, - "max_memory": { - "type": "string", - "default": "32 GB", - "description": "set maximum memory" - }, - "max_time": { - "type": "string", - "default": "72.h", - "description": "set maximum running time" + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", + "hidden": true + }, + "plaintext_email": { + "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true + } + } } - }, - "help_text": "The process specify the resources through the label and the config. But each resource type is then checked against the max value specified here, if it is bigger then the value specified in the corresponding max_ is used instead. This happens through the check_max custom function present in the main nextflow.config.", - "fa_icon": "fas fa-terminal" }, - "on_error_options": { - "title": "On error options", - "type": "object", - "description": "What to do and how to handle errors", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "err_start": { - "type": "string", - "default": "finish", - "description": "Tells the pipeline how to behave on error", - "help_text": "refer to nextflow errorStrategy documentation for more details." + "allOf": [ + { + "$ref": "#/$defs/input_output_options" }, - "max_retries": { - "type": "integer", - "default": 0, - "description": "number of time to retry if err_strat is\u00a0set to retry", - "help_text": "this also acts as a multiplier for recources request. If it failed for lack of resources it automaticly asks more the second time. take a look at test.conf for more details." - } - } - }, - "skip_options": { - "title": "Skip options", - "type": "object", - "description": "options to skip or change bhaviour of pipeline", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "check_model": { - "type": "boolean", - "default": true, - "description": "checks if all input are comatible and the model can be tuned.", - "help_text": "flag to tell whether to check or not if the model can be tuned and trained. It does one call of the batch function, (predicting), of the model importing and using everything needed for that. Default run such a check." + { + "$ref": "#/$defs/optional_inputs" }, - "check_model_num_samples": { - "type": "string", - "description": "optional flag to do a more/less extensive check during check_model.", - "help_text": "This will override user given num_sample value for the tune run. This will give the user control on how extensive it wants the check to be. by default is going to be set to 3." + { + "$ref": "#/$defs/resources_options" }, - "shuffle": { - "type": "boolean", - "default": true, - "description": "run the shuffle sanity check", - "help_text": "flag to tell wether to shuffle or not the data and run a train on it. Sanity check always run on default. " + { + "$ref": "#/$defs/on_error_options" }, - "debug_mode": { - "type": "boolean", - "description": "developer flag", - "help_text": "flag used to switch to debug mode for the pipeline. more verbose outputs." - } - } - }, - "general_options": { - "title": "General options", - "type": "object", - "description": "generic options", - "default": "", - "fa_icon": "fas fa-terminal", - "properties": { - "singularity_cache_dir": { - "type": "string", - "default": "singularity_cache", - "description": "the directory where singularity images will be placed" + { + "$ref": "#/$defs/skip_options" }, - "help": { - "type": "boolean", - "description": "prints this help section" + + { + "$ref": "#/$defs/institutional_config_options" }, - "validate_params": { - "type": "boolean", - "description": "to validate or not the input params", - "default": true + { + "$ref": "#/$defs/generic_options" } - } - }, - "config_options": { - "title": "Config options", - "type": "object", - "description": "options specific for config files", - "default": "", - "properties": { - "config_profile_name": { - "type": "string", - "description": "the name of the config used" - }, - "config_profile_description": { - "type": "string", - "description": "the description of the config fil,e" - } - }, - "fa_icon": "fas fa-terminal" - } - }, - "allOf": [ - { - "$ref": "#/definitions/input_files_options" - }, - { - "$ref": "#/definitions/optional_inputs" - }, - { - "$ref": "#/definitions/output_options" - }, - { - "$ref": "#/definitions/resorces_options" - }, - { - "$ref": "#/definitions/on_error_options" - }, - { - "$ref": "#/definitions/skip_options" - }, - { - "$ref": "#/definitions/general_options" - }, - { - "$ref": "#/definitions/config_options" - } - ] + ] } diff --git a/subworkflows/check_model.nf b/subworkflows/local/check_model/main.nf similarity index 87% rename from subworkflows/check_model.nf rename to subworkflows/local/check_model/main.nf index ff555db..9079db0 100644 --- a/subworkflows/check_model.nf +++ b/subworkflows/local/check_model/main.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { CHECK_TORCH_MODEL } from '../modules/local/check_torch_model.nf' +include { CHECK_TORCH_MODEL } from '../../../modules/local/check_torch_model.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -20,20 +20,19 @@ workflow CHECK_MODEL { input_model input_tune_config input_initial_weights - + main: completion_message = "\n###\nThe check of the model has been skipped.\n###\n" - // It can be still skipped, but by default is run. + // It can be still skipped, but by default is run. if ( params.check_model ) { - + // put the files in channels csv = Channel.fromPath( input_csv ) model = Channel.fromPath( input_model ).first() // TODO implement a check and complain if more that one file is pased as model - json = Channel.fromPath( input_json ) + json = Channel.fromPath( input_json ) tune_config = Channel.fromPath( input_tune_config ) - // combine everything toghether in a all vs all manner model_tuple = csv.combine(model) @@ -47,11 +46,11 @@ workflow CHECK_MODEL { initial_weights = Channel.fromPath( input_initial_weights ) model_tuple = model_tuple.combine(initial_weights) } - + // launch the check using torch. TODO put selection of module based on type: torc, tensorflow ecc.. - CHECK_TORCH_MODEL( model_tuple ) + CHECK_TORCH_MODEL( model_tuple ) completion_message = CHECK_TORCH_MODEL.out.standardout - + } // to enforce that the second run workflow depends on this one @@ -60,7 +59,6 @@ workflow CHECK_MODEL { } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/workflows/handle_analysis.nf b/subworkflows/local/handle_analysis/main.nf similarity index 90% rename from workflows/handle_analysis.nf rename to subworkflows/local/handle_analysis/main.nf index 0a8800e..544be47 100644 --- a/workflows/handle_analysis.nf +++ b/subworkflows/local/handle_analysis/main.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { STIMULUS_ANALYSIS_DEFAULT } from '../modules/local/stimulus_analysis_default' +include { STIMULUS_ANALYSIS_DEFAULT } from '../../../modules/local/stimulus_analysis_default' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -19,19 +19,19 @@ workflow HANDLE_ANALYSIS { input_model main: - + // 1. Run the default analysis for all models and data together. group them by the same test set -> aka same split process input_tune_out .map{it -> [it[0], it[1], it[2], it[3], it[4], it[5], it[6], it[7]]} // cannot work with empty initial_weights now. TODO fix this in the future .groupTuple() .set{ ch2default } ch2default.view() - + STIMULUS_ANALYSIS_DEFAULT( ch2default, input_model ) // 2. Run the motif discovery block - -} \ No newline at end of file + +} diff --git a/workflows/handle_data.nf b/subworkflows/local/handle_data/main.nf similarity index 91% rename from workflows/handle_data.nf rename to subworkflows/local/handle_data/main.nf index 6f01043..04417c9 100644 --- a/workflows/handle_data.nf +++ b/subworkflows/local/handle_data/main.nf @@ -4,11 +4,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { INTERPRET_JSON } from '../modules/local/interpret_json.nf' -include { SPLIT_CSV } from '../subworkflows/split_csv.nf' -include { TRANSFORM_CSV } from '../subworkflows/transform_csv.nf' -include { SHUFFLE_CSV } from '../subworkflows/shuffle_csv.nf' - +include { INTERPRET_JSON } from '../../../modules/local/interpret_json.nf' +include { SPLIT_CSV } from '../../../subworkflows/local/split_csv' +include { TRANSFORM_CSV } from '../../../subworkflows/local/transform_csv' +include { SHUFFLE_CSV } from '../../../subworkflows/local/shuffle_csv' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -25,8 +24,8 @@ workflow HANDLE_DATA { main: - - // print the message from the check_model subworkflow + + // print the message from the check_model subworkflow message_from_check.view() // put the files in channels @@ -54,7 +53,7 @@ workflow HANDLE_DATA { // run split with json that only contains experiment name and split information. It runs only the necessary times, all unique ways to split + default split (random split) or column split (already present in data). SPLIT_CSV( csv, split_json ) - // assign to each splitted data the associated ransform information based on the split_transform_key generated in the interpret step. + // assign to each splitted data the associated ransform information based on the split_transform_key generated in the interpret step. transform_split_tuple = transform_json.combine( SPLIT_CSV.out.split_data, by: 0 ) // launch the actual noise subworkflow @@ -65,7 +64,7 @@ workflow HANDLE_DATA { tmp = experiment_json.combine( TRANSFORM_CSV.out.transformed_data, by: 0 ).map{ it -> ["${it[6].name} - ${it[0]}", it[2], it[1], it[4]] } - + // Launch the shuffle, (always happening on default) and disjointed from noise. Data are taken from the no-split option of split module. Which means that is either randomly splitted with default values or using the column present in the data. // It can be still skipped but by default is run. shuffle is set to true in nextflow.config data = tmp @@ -78,11 +77,11 @@ workflow HANDLE_DATA { // merge output of shuffle to the output of noise data = tmp.concat( SHUFFLE_CSV.out.shuffle_data ) } - + emit: - data - + data + } diff --git a/workflows/handle_tune.nf b/subworkflows/local/handle_tune/main.nf similarity index 92% rename from workflows/handle_tune.nf rename to subworkflows/local/handle_tune/main.nf index 6914bc5..8908cff 100644 --- a/workflows/handle_tune.nf +++ b/subworkflows/local/handle_tune/main.nf @@ -4,8 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { TORCH_TUNE } from '../modules/local/torch_tune.nf' - +include { TORCH_TUNE } from '../../../modules/local/torch_tune.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -22,13 +21,13 @@ workflow HANDLE_TUNE { input_initial_weights main: - // put the files in channels, + // put the files in channels, model = Channel.fromPath( input_model ).first() // TODO implement a check and complain if more that one file is pased as model tune_config = Channel.fromPath( input_tune_config ) // assign a model and a TUNE_config to each data model_conf_pair = model.combine(tune_config) - model_conf_data = model_conf_pair.combine(data).map{ + model_conf_data = model_conf_pair.combine(data).map{ it -> [it[2], it[3], it[1], it[0], it[5], it[4]] } // just reordering according to the inputs of the launch_tuning.py @@ -53,12 +52,11 @@ workflow HANDLE_TUNE { tune_out = TORCH_TUNE.out.tune_specs model debug - -} +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ \ No newline at end of file +*/ diff --git a/subworkflows/shuffle_csv.nf b/subworkflows/local/shuffle_csv/main.nf similarity index 89% rename from subworkflows/shuffle_csv.nf rename to subworkflows/local/shuffle_csv/main.nf index c15091a..34748f0 100644 --- a/subworkflows/shuffle_csv.nf +++ b/subworkflows/local/shuffle_csv/main.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { STIMULUS_SHUFFLE_CSV } from '../modules/local/stimulus_shuffle_csv.nf' +include { STIMULUS_SHUFFLE_CSV } from '../../../modules/local/stimulus_shuffle_csv.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -16,10 +16,10 @@ workflow SHUFFLE_CSV { take: csv_json_pairs - + main: - + STIMULUS_SHUFFLE_CSV(csv_json_pairs) diff --git a/subworkflows/split_csv.nf b/subworkflows/local/split_csv/main.nf similarity index 92% rename from subworkflows/split_csv.nf rename to subworkflows/local/split_csv/main.nf index d3e1ef4..8bf6037 100644 --- a/subworkflows/split_csv.nf +++ b/subworkflows/local/split_csv/main.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { STIMULUS_SPLIT_CSV } from '../modules/local/stimulus_split_csv.nf' +include { STIMULUS_SPLIT_CSV } from '../../../modules/local/stimulus_split_csv.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/subworkflows/transform_csv.nf b/subworkflows/local/transform_csv/main.nf similarity index 92% rename from subworkflows/transform_csv.nf rename to subworkflows/local/transform_csv/main.nf index dd1f84f..14c0a0f 100644 --- a/subworkflows/transform_csv.nf +++ b/subworkflows/local/transform_csv/main.nf @@ -4,7 +4,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { STIMULUS_TRANSFORM_CSV } from '../modules/local/stimulus_transform_csv.nf' +include { STIMULUS_TRANSFORM_CSV } from '../../../modules/local/stimulus_transform_csv.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -16,7 +16,7 @@ workflow TRANSFORM_CSV { take: csv_json_pairs - + main: // TODO add strategy for handling the launch of stimulus noiser as well as NF-core and other modules diff --git a/subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf b/subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf new file mode 100644 index 0000000..6a4e7b5 --- /dev/null +++ b/subworkflows/local/utils_nfcore_deepmodeloptim_pipeline/main.nf @@ -0,0 +1,228 @@ +// +// Subworkflow with functionality specific to the nf-core/deepmodeloptim pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + //input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN ( + workflow, + validate_params, + null + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Create channel from input file provided through params.input + // + + // TODO: Adequate it to our input if `input` is implemented + // Channel + // .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + // .map { + // meta, fastq_1, fastq_2 -> + // if (!fastq_2) { + // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + // } else { + // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + // } + // } + // .groupTuple() + // .map { samplesheet -> + // validateInputSamplesheet(samplesheet) + // } + // .map { + // meta, fastqs -> + // return [ meta, fastqs.flatten() ] + // } + // .set { ch_samplesheet } + + emit: + // samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + [] + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + + + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + + + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..0fcbf7b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,124 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..a09572e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..5cb7baf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,462 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Handling multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + } + mqc_report = mqc_report[0] + } + } + } + catch (Exception all) { + if (multiqc_report) { + log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { +new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception all) { + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 0000000..4994303 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 0000000..f7d9f02 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 0000000..842dc43 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = 1 + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 0000000..0907ac5 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..331e0d2 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/tests/handle_data.nf.test b/tests/handle_data.nf.test index b0eb9c5..2131c7c 100644 --- a/tests/handle_data.nf.test +++ b/tests/handle_data.nf.test @@ -9,10 +9,11 @@ nextflow_workflow { when { - // make so that nf-test will see a singularity cache dir where is usally set to, again interpretation of projectdir has to be done here and not rely on an external config. - params { - singularity_cache_dir = "${projectDir}/singularity_cache" - } + // TODO Check that continues working with $NXF_SINGULARITY_CACHEDIR + // // make so that nf-test will see a singularity cache dir where is usally set to, again interpretation of projectdir has to be done here and not rely on an external config. + // params { + // singularity_cache_dir = "${projectDir}/singularity_cache" + // } // repeating in input[0-1] the same content of params.csv and parma.exp_conf present in the test.config because nf-test does not interpret the ${projectDir} correctly otherwise workflow { diff --git a/tests/handle_tune.nf.test b/tests/handle_tune.nf.test index ea97be9..0195c38 100644 --- a/tests/handle_tune.nf.test +++ b/tests/handle_tune.nf.test @@ -20,16 +20,15 @@ nextflow_workflow { } when { - + // make so that nf-test will see a singularity cache dir where is usally set to, again interpretation of projectdir has to be done here and not rely on an external config. params { - singularity_cache_dir = "${projectDir}/singularity_cache" + // TODO Check that continues working with $NXF_SINGULARITY_CACHEDIR + // singularity_cache_dir = "${projectDir}/singularity_cache" debug_mode = true - max_gpus = 0 - } // repeating in input[0-1] the same content params.model of params.tune_conf present in the test.config because nf-test does not interpret the ${projectDir} correctly otherwise @@ -48,7 +47,7 @@ nextflow_workflow { // first chech that the model initialization is always identical // then check that the seeds used are the same through randomly picked numbers assertAll( - { assert workflow.success }, + { assert workflow.success }, { assert snapshot(workflow.out.debug).match() } ) } diff --git a/tests/nextflow.config b/tests/nextflow.config index f4281d5..f7b650f 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -33,4 +33,4 @@ manifest { timeline { enabled = false } report { enabled = false } trace { enabled = false } -dag { enabled = false } \ No newline at end of file +dag { enabled = false } diff --git a/tower.yml b/tower.yml new file mode 100644 index 0000000..c61323c --- /dev/null +++ b/tower.yml @@ -0,0 +1,3 @@ +reports: + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" diff --git a/workflows/deepmodeloptim.nf b/workflows/deepmodeloptim.nf new file mode 100644 index 0000000..1f8def8 --- /dev/null +++ b/workflows/deepmodeloptim.nf @@ -0,0 +1,92 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { paramsSummaryMap } from 'plugin/nf-schema' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_deepmodeloptim_pipeline' +include { CHECK_MODEL } from '../subworkflows/local/check_model' +include { HANDLE_DATA } from '../subworkflows/local/handle_data' +include { HANDLE_TUNE } from '../subworkflows/local/handle_tune' +include { HANDLE_ANALYSIS } from '../subworkflows/local/handle_analysis' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow DEEPMODELOPTIM { + + take: + // ch_samplesheet, // channel: samplesheet read in from --input + ch_csv + ch_exp_conf + ch_model + ch_tune_conf + ch_initial_weights + + main: + + ch_versions = Channel.empty() + + CHECK_MODEL ( + params.csv, + params.exp_conf, + params.model, + params.tune_conf, + params.initial_weights + ) + completion_message = CHECK_MODEL.out.completion_message + + HANDLE_DATA( + params.csv, + params.exp_conf, + completion_message + ) + prepared_data = HANDLE_DATA.out.data + //HANDLE_DATA.out.data.view() + + HANDLE_TUNE( + params.model, + params.tune_conf, + prepared_data, + params.initial_weights + ) + //HANDLE_TUNE.out.model.view() + //HANDLE_TUNE.out.tune_out.view() + + // this part works, but the docker container is not updated with matplotlib yet + HANDLE_ANALYSIS( + HANDLE_TUNE.out.tune_out, + HANDLE_TUNE.out.model + ) + + // + // Collate and save software versions + // + // TODO: collect software versions + // softwareVersionsToYAML(ch_versions) + // .collectFile( + // storeDir: "${params.outdir}/pipeline_info", + // name: 'nf_core_' + 'pipeline_software_' + '' + 'versions.yml', + // sort: true, + // newLine: true + // ).set { ch_collated_versions } + + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/