From 54e921e3bd2d079dcfc567d39e9f5cbeb888c4f4 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Fri, 13 Feb 2026 15:44:57 +0100 Subject: [PATCH 1/7] addess review comments --- README.md | 4 ++++ docs/usage.md | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index a249295e..a416c448 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ It can perform subsampling, quality assessment, duplication level analysis, and The pipeline generates detailed MultiQC reports with flexible output options, ranging from individual sample reports to project-wide summaries, making it particularly useful for sequencing core facilities and research groups with access to sequencing instruments. If provided, nf-core/seqinspector can also parse statistics from an Illumina run folder directory into the final MultiQC reports. +### Compatibility between tools and data type + | Tool Type | Tool Name | Tool Description | Compatibility with Data | Dependencies | Default tool | @@ -40,6 +42,8 @@ If provided, nf-core/seqinspector can also parse statistics from an Illumina run | `QC` | [`Picard_collecthsmetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard) | Collect alignment QC metrics of hybrid-selection data. | [RNA, DNA] | [Bwamem2, SAMtools, `--fasta`, `--run_picard_collecths_metrics`, `--bait_intervals`, `--target_intervals` (`--ref_dict`)] | no | | `Reporting` | [`MultiQC`](http://multiqc.info/) | Present QC for raw reads | [RNA, DNA, synthetic] | [N/A] | yes | +### Workflow diagram + diff --git a/docs/usage.md b/docs/usage.md index e7ddeb0c..e419b7ab 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,13 +6,18 @@ ### General points -The nf-core/seqinspector pipeline is a general QC pipeline for sequencing data. The current version only supports data in fastq format. -The pipeline is meant to include a large amount of possible QC tools to chose from, but not all of them may be relevant to your data. As such we highly recommend to familiarize yourself with the different QC tools available and to remove any QC tool you would like to exclude with the `--skip-tools` command line parameter. For repeated use we suggest to create a params file containing the `--skip-tools` parameters (for details see the "Running the pipeline" section). -Be aware that some tools are skipped by default and will need to be included in the list of skipped tools when curating your own list. To identify defaults included or excluded please check out the overview table in the Introduction. +The nf-core/seqinspector pipeline is a general QC pipeline for sequencing data. +The current version only supports data in fastq format. +The pipeline is meant to include a large amount of possible QC tools to chose from, but not all of them may be relevant to your data. +As such we highly recommend to familiarize yourself with the different QC tools available and to remove any QC tool you would like to exclude with the `--skip-tools` command line parameter. +For repeated use we suggest to create a params file containing the `--skip-tools` parameters (for details see the "Running the pipeline" section). +Be aware that some tools are skipped by default and will need to be included in the list of skipped tools when curating your own list. +To identify defaults included or excluded please check out [the overview compatibility between tools and data type table](https://nf-co.re/seqinspector/#compatibility-between-tools-and-data-type). ### What nf-core/seqinspector is not for -The results of the nf-core/seqinspector pipeline are not meant to be used for any downstream analysis, but are exclusively for QC purposes. Even tools that may be used in other pipelines as a starting point for analysis are run in a QC perspective, most likely with a downsampled input. +The results of the nf-core/seqinspector pipeline are not meant to be used for any downstream analysis, but are exclusively for QC purposes. +Even tools that may be used in other pipelines as a starting point for analysis are run in a QC perspective, most likely with a downsampled input. ## Samplesheet input @@ -26,7 +31,7 @@ You will need to create a samplesheet with information about the samples/fastq f The following simple run dir structure... -``` +```bash run_dir ├── sample1_lane1_group1_r1.fq.gz ├── sample2_lane1_group1_r1.fq.gz @@ -42,7 +47,6 @@ sample1 path/to/run_dir/sample1_lane1_group1_r1.fq.gz path/to/run_dir pr sample2 path/to/run_dir/sample2_lane1_group1_r1.fq.gz path/to/run_dir project1:group1 sample3 path/to/run_dir/sample3_lane2_group2_r1.fq.gz path/to/run_dir project1:group2 sample4 path/to/run_dir/sample4_lane2_group3_r1.fq.gz path/to/run_dir control - ``` | Column | Description | @@ -100,17 +104,29 @@ genome: 'GRCh37' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). -Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Both absolute numbers (e.g 100) and relative numbers (e.g 0.25) can be specified. +### Sample size selection + +Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. +Both absolute numbers (e.g 100) and relative numbers (e.g 0.25) can be specified. ```bash nextflow run nf-core/seqinspector --input ./samplesheet.csv --outdir ./results --sample_size 1000000 -profile docker ``` +### Hybrid-selection QC metrics + +The pipeline supports hybrid-selection (HS) QC metrics collection . +Use `--run_picard_collecthsmetrics true` to run the QC tool [picard CollectHSmetrics](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard). +This tool is otherwise not run by default. + ### Skipping tools -Some tools might not be compatible with your data. In this case you can skip them by providing a comma-separated list of tools to be skipped with the `--skip_tools` parameter. +Some tools might not be compatible with your data. +In this case you can skip them by providing a comma-separated list of tools to be skipped with the `--skip_tools` parameter. -In case you want to make this more permanent, it is recommended to specify this in a params file, or even in your own nextflow configuration file. The nextflow configuration file can also be use to customise tool arguments. See official [nexflow](https://www.nextflow.io/docs/latest/config.html) and [nf-core](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) documentation for further details. +In case you want to make this more permanent, it is recommended to specify this in your own nextflow configuration file. +The nextflow configuration file can also be use to customise tool arguments. +See official [nexflow](https://www.nextflow.io/docs/latest/config.html) and [nf-core](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) documentation for further details. ### Updating the pipeline @@ -170,7 +186,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `wave` - - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow `24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. @@ -229,7 +245,3 @@ We recommend adding the following line to your environment to limit this (typica ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` - -## Hybrid-selection QC metrics - -The pipeline supports hybrid-selection (HS) QC metrics collection . Use `--run_picard_collecthsmetrics true` to run the QC tool [picard CollectHSmetrics](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard). This tool is otherwise not run by default. From bd25fed29b226846e634018b6ec1df0cb482cc8e Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Fri, 13 Feb 2026 15:48:37 +0100 Subject: [PATCH 2/7] Update usage.md --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index e419b7ab..08f6f70e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ The pipeline is meant to include a large amount of possible QC tools to chose fr As such we highly recommend to familiarize yourself with the different QC tools available and to remove any QC tool you would like to exclude with the `--skip-tools` command line parameter. For repeated use we suggest to create a params file containing the `--skip-tools` parameters (for details see the "Running the pipeline" section). Be aware that some tools are skipped by default and will need to be included in the list of skipped tools when curating your own list. -To identify defaults included or excluded please check out [the overview compatibility between tools and data type table](https://nf-co.re/seqinspector/#compatibility-between-tools-and-data-type). +To identify defaults included or excluded please check out [the overview compatibility between tools and data type table](../#compatibility-between-tools-and-data-type). ### What nf-core/seqinspector is not for From deb70d4a2ea7e6d222793616830a1e53a4b3ef17 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Fri, 13 Feb 2026 15:49:53 +0100 Subject: [PATCH 3/7] Apply suggestion from @maxulysse --- docs/usage.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 08f6f70e..a58ba6af 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -124,7 +124,6 @@ This tool is otherwise not run by default. Some tools might not be compatible with your data. In this case you can skip them by providing a comma-separated list of tools to be skipped with the `--skip_tools` parameter. -In case you want to make this more permanent, it is recommended to specify this in your own nextflow configuration file. The nextflow configuration file can also be use to customise tool arguments. See official [nexflow](https://www.nextflow.io/docs/latest/config.html) and [nf-core](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) documentation for further details. From ea4a4196bd48501f37493dc59a7e7f14f59593d9 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Fri, 13 Feb 2026 16:03:36 +0100 Subject: [PATCH 4/7] fix warning message about tag collision --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 37ec91ee..00ad7aa0 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -143,7 +143,9 @@ workflow PIPELINE_INITIALISATION { .map { tag_name -> [tag_name.toLowerCase(), tag_name] } .groupTuple() .map { _tag_lowercase, tags -> - assert tags.size() == 1 : "Tag name collision: " + tags.join(", ") + if (tags.size() == 1) { + log.warn("Tag name collision: " + tags.join(", ")) + } } emit: From 3e89d8d3d77739e95b52d2d08d3e1b0c9ba77c80 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Fri, 13 Feb 2026 16:05:27 +0100 Subject: [PATCH 5/7] CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6015912..f7c7ad46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#169](https://github.com/nf-core/seqinspector/pull/169) Rescue missing versions from PREPARE_GENOME subworkflow - [#171](https://github.com/nf-core/seqinspector/pull/171) Rescue number of tasks in the pipeline level tests - [#172](https://github.com/nf-core/seqinspector/pull/172) More complete conda environment for rundir parser +- [#173](https://github.com/nf-core/seqinspector/pull/173) Fix warning message for tag name collision ### `Changed` @@ -72,6 +73,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c - [#164](https://github.com/nf-core/seqinspector/pull/164) Refactor local subworkflow and pipeline tests - [#168](https://github.com/nf-core/seqinspector/pull/168) Adhere to strict syntax - [#169](https://github.com/nf-core/seqinspector/pull/169) Prepare release 1.0.0 +- [#173](https://github.com/nf-core/seqinspector/pull/173) Improve documentation ### `Dependencies` From 971c991f760b39a172706c4fe047f4088676fbf2 Mon Sep 17 00:00:00 2001 From: Maxime U Garcia Date: Fri, 13 Feb 2026 16:19:36 +0100 Subject: [PATCH 6/7] Apply suggestion from @maxulysse --- subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index 00ad7aa0..14748673 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -144,7 +144,7 @@ workflow PIPELINE_INITIALISATION { .groupTuple() .map { _tag_lowercase, tags -> if (tags.size() == 1) { - log.warn("Tag name collision: " + tags.join(", ")) + log.warn("Tag name collision: " + tags) } } From c719d44bd291cc47f999effecc6b2212d76daebf Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Mon, 16 Feb 2026 09:55:33 +0100 Subject: [PATCH 7/7] update contributor --- README.md | 5 ++-- nextflow.config | 4 ++- ro-crate-metadata.json | 58 +++++++++++++++++++++--------------------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index a416c448..7ea39662 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,9 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/). +nf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/). + +Maintenance is now lead by Maxime U Garcia ([National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/)) We thank the following people for their extensive assistance in the development of this pipeline: @@ -104,7 +106,6 @@ We thank the following people for their extensive assistance in the development - [@kjellinjonas](https://github.com/kjellinjonas) - [@mahesh-panchal](https://github.com/mahesh-panchal) - [@matrulda](https://github.com/matrulda) -- [@maxulysse](https://github.com/maxulysse) - [@mirpedrol](https://github.com/mirpedrol) - [@nggvs](https://github.com/nggvs) - [@nkongenelly](https://github.com/nkongenelly) diff --git a/nextflow.config b/nextflow.config index 3df58bb6..4f3dd01a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -256,6 +256,7 @@ manifest { affiliation: 'Pixelgen Technologies', github: 'Aratz', contribution: ['author'], + orcid: '0000-0003-2702-1103' ], [ name: 'Alfred Kedhammar', @@ -274,7 +275,7 @@ manifest { name: 'Maxime U Garcia', affiliation: 'National Genomics Infrastructure', github: 'maxulysse', - contribution: ['maintainer'], + contribution: ['contributor', 'maintainer'], orcid: '0000-0003-2827-9261', ], [ @@ -307,6 +308,7 @@ manifest { affiliation: 'National Bioinformatics Infrastructure Sweden', github: 'mahesh-panchal', contribution: ['contributor'], + orcid: '0000-0003-1675-0677' ], [ name: 'Ramprasad Neethiraj', diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 242fa244..6c0c4e85 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "Stable", - "datePublished": "2026-02-10T15:24:32+00:00", - "description": "

\n \n \n \"nf-core/seqinspector\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqinspector)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqinspector/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqinspector)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqinspector-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqinspector)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqinspector** is a bioinformatics pipeline that processes raw sequence data (FASTQ) to provide comprehensive quality control.\nIt can perform subsampling, quality assessment, duplication level analysis, and complexity evaluation on a per-sample basis, while also detecting adapter content, technical artifacts, and common biological contaminants.\nThe pipeline generates detailed MultiQC reports with flexible output options, ranging from individual sample reports to project-wide summaries, making it particularly useful for sequencing core facilities and research groups with access to sequencing instruments.\nIf provided, nf-core/seqinspector can also parse statistics from an Illumina run folder directory into the final MultiQC reports.\n\n\n\n| Tool Type | Tool Name | Tool Description | Compatibility with Data | Dependencies | Default tool |\n| ------------------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------ |\n| `Subsampling` | [`Seqtk`](https://github.com/lh3/seqtk) | Global subsampling of reads. Only performs subsampling if `--sample_size` parameter is given. | [RNA, DNA, synthetic] | [N/A] | no |\n| `Indexing, Mapping` | [`Bwamem2`](https://github.com/bwa-mem2/bwa-mem2) | Align reads to reference | [RNA, DNA] | [N/A] | yes |\n| `Indexing` | [`SAMtools`](http://github.com/samtools) | Index aligned BAM files, create FASTA index | [DNA] | [N/A] | yes |\n| `QC` | [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FastqScreen`](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) | Basic contamination detection | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`SeqFu Stats`](https://github.com/telatin/seqfu2) | Sequence statistics | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`Picard collect multiple metrics`](https://broadinstitute.github.io/picard/picard-metric-definitions.html) | Collect multiple QC metrics | [RNA, DNA] | [Bwamem2, SAMtools, `--genome`] | yes |\n| `QC` | [`Picard_collecthsmetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard) | Collect alignment QC metrics of hybrid-selection data. | [RNA, DNA] | [Bwamem2, SAMtools, `--fasta`, `--run_picard_collecths_metrics`, `--bait_intervals`, `--target_intervals` (`--ref_dict`)] | no |\n| `Reporting` | [`MultiQC`](http://multiqc.info/) | Present QC for raw reads | [RNA, DNA, synthetic] | [N/A] | yes |\n\n\n \n \n \"Fallback\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,rundir,tags\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2\n```\n\nEach row represents a fastq file (single-end with only `fastq_1`) or a pair of fastq files (paired end with `fastq_1` and `fastq_2`).\n`rundir` is the path to the runfolder.\n`tags` is a colon-separated list of tags that will be added to the MultiQC report for this `sample`.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/seqinspector \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqinspector/usage) and the [parameter documentation](https://nf-co.re/seqinspector/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/seqinspector/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/seqinspector/output).\n\n## Credits\n\nnf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [@NationalGenomicsInfrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/).\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [@adamrtalbot](https://github.com/adamrtalbot)\n- [@alneberg](https://github.com/alneberg)\n- [@beatrizsavinhas](https://github.com/beatrizsavinhas)\n- [@ctuni](https://github.com/ctuni)\n- [@edmundmiller](https://github.com/edmundmiller)\n- [@EliottBo](https://github.com/EliottBo)\n- [@KarNair](https://github.com/KarNair)\n- [@kjellinjonas](https://github.com/kjellinjonas)\n- [@mahesh-panchal](https://github.com/mahesh-panchal)\n- [@matrulda](https://github.com/matrulda)\n- [@maxulysse](https://github.com/maxulysse)\n- [@mirpedrol](https://github.com/mirpedrol)\n- [@nggvs](https://github.com/nggvs)\n- [@nkongenelly](https://github.com/nkongenelly)\n- [@Patricie34](https://github.com/Patricie34)\n- [@pontushojer](https://github.com/pontushojer)\n- [@ramprasadn](https://github.com/ramprasadn)\n- [@rannick](https://github.com/rannick)\n- [@torigiffin](https://github.com/torigiffin)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqinspector` channel](https://nfcore.slack.com/channels/seqinspector) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2026-02-16T08:55:13+00:00", + "description": "

\n \n \n \"nf-core/seqinspector\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/seqinspector)\n[![GitHub Actions CI Status](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/seqinspector/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/seqinspector/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/seqinspector)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23seqinspector-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/seqinspector)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/seqinspector** is a bioinformatics pipeline that processes raw sequence data (FASTQ) to provide comprehensive quality control.\nIt can perform subsampling, quality assessment, duplication level analysis, and complexity evaluation on a per-sample basis, while also detecting adapter content, technical artifacts, and common biological contaminants.\nThe pipeline generates detailed MultiQC reports with flexible output options, ranging from individual sample reports to project-wide summaries, making it particularly useful for sequencing core facilities and research groups with access to sequencing instruments.\nIf provided, nf-core/seqinspector can also parse statistics from an Illumina run folder directory into the final MultiQC reports.\n\n### Compatibility between tools and data type\n\n\n\n| Tool Type | Tool Name | Tool Description | Compatibility with Data | Dependencies | Default tool |\n| ------------------- | ------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------ |\n| `Subsampling` | [`Seqtk`](https://github.com/lh3/seqtk) | Global subsampling of reads. Only performs subsampling if `--sample_size` parameter is given. | [RNA, DNA, synthetic] | [N/A] | no |\n| `Indexing, Mapping` | [`Bwamem2`](https://github.com/bwa-mem2/bwa-mem2) | Align reads to reference | [RNA, DNA] | [N/A] | yes |\n| `Indexing` | [`SAMtools`](http://github.com/samtools) | Index aligned BAM files, create FASTA index | [DNA] | [N/A] | yes |\n| `QC` | [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) | Read QC | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`FastqScreen`](https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/) | Basic contamination detection | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`SeqFu Stats`](https://github.com/telatin/seqfu2) | Sequence statistics | [RNA, DNA] | [N/A] | yes |\n| `QC` | [`Picard collect multiple metrics`](https://broadinstitute.github.io/picard/picard-metric-definitions.html) | Collect multiple QC metrics | [RNA, DNA] | [Bwamem2, SAMtools, `--genome`] | yes |\n| `QC` | [`Picard_collecthsmetrics`](https://gatk.broadinstitute.org/hc/en-us/articles/360036856051-CollectHsMetrics-Picard) | Collect alignment QC metrics of hybrid-selection data. | [RNA, DNA] | [Bwamem2, SAMtools, `--fasta`, `--run_picard_collecths_metrics`, `--bait_intervals`, `--target_intervals` (`--ref_dict`)] | no |\n| `Reporting` | [`MultiQC`](http://multiqc.info/) | Present QC for raw reads | [RNA, DNA, synthetic] | [N/A] | yes |\n\n### Workflow diagram\n\n\n \n \n \"Fallback\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fastq_1,fastq_2,rundir,tags\nCONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,200624_A00834_0183_BHMTFYDRXX,lane1:project5:group2\n```\n\nEach row represents a fastq file (single-end with only `fastq_1`) or a pair of fastq files (paired end with `fastq_1` and `fastq_2`).\n`rundir` is the path to the runfolder.\n`tags` is a colon-separated list of tags that will be added to the MultiQC report for this `sample`.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/seqinspector \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/seqinspector/usage) and the [parameter documentation](https://nf-co.re/seqinspector/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/seqinspector/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/seqinspector/output).\n\n## Credits\n\nnf-core/seqinspector was originally written by [@agrima2010](https://github.com/agrima2010), [@Aratz](https://github.com/Aratz), [@FranBonath](https://github.com/FranBonath), [@kedhammar](https://github.com/kedhammar), and [@MatthiasZepper](https://github.com/MatthiasZepper) from the Swedish [National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/) and [Clinical Genomics Stockholm](https://clinical.scilifelab.se/).\n\nMaintenance is now lead by Maxime U Garcia ([National Genomics Infrastructure](https://github.com/NationalGenomicsInfrastructure/))\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [@adamrtalbot](https://github.com/adamrtalbot)\n- [@alneberg](https://github.com/alneberg)\n- [@beatrizsavinhas](https://github.com/beatrizsavinhas)\n- [@ctuni](https://github.com/ctuni)\n- [@edmundmiller](https://github.com/edmundmiller)\n- [@EliottBo](https://github.com/EliottBo)\n- [@KarNair](https://github.com/KarNair)\n- [@kjellinjonas](https://github.com/kjellinjonas)\n- [@mahesh-panchal](https://github.com/mahesh-panchal)\n- [@matrulda](https://github.com/matrulda)\n- [@mirpedrol](https://github.com/mirpedrol)\n- [@nggvs](https://github.com/nggvs)\n- [@nkongenelly](https://github.com/nkongenelly)\n- [@Patricie34](https://github.com/Patricie34)\n- [@pontushojer](https://github.com/pontushojer)\n- [@ramprasadn](https://github.com/ramprasadn)\n- [@rannick](https://github.com/rannick)\n- [@torigiffin](https://github.com/torigiffin)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#seqinspector` channel](https://nfcore.slack.com/channels/seqinspector) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -102,7 +102,7 @@ }, "mentions": [ { - "@id": "#eb65e4a1-3330-4f79-98cb-e1b2a7f86b81" + "@id": "#08014bc0-7c74-49f0-a201-fcc18504ce2b" } ], "name": "nf-core/seqinspector" @@ -131,10 +131,7 @@ ], "creator": [ { - "@id": "https://orcid.org/0009-0004-5536-7210" - }, - { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "#max.u.garcia@gmail.com" }, { "@id": "https://orcid.org/0000-0003-2702-1103" @@ -143,11 +140,14 @@ "@id": "#89784800+kedhammar@users.noreply.github.com" }, { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0003-1675-0677" + }, + { + "@id": "https://orcid.org/0009-0004-5536-7210" } ], "dateCreated": "", - "dateModified": "2026-02-10T16:24:32Z", + "dateModified": "2026-02-16T09:55:13Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -160,10 +160,7 @@ ], "maintainer": [ { - "@id": "https://orcid.org/0009-0004-5536-7210" - }, - { - "@id": "https://orcid.org/0000-0003-1675-0677" + "@id": "#max.u.garcia@gmail.com" }, { "@id": "https://orcid.org/0000-0003-2702-1103" @@ -172,7 +169,10 @@ "@id": "#89784800+kedhammar@users.noreply.github.com" }, { - "@id": "#max.u.garcia@gmail.com" + "@id": "https://orcid.org/0000-0003-1675-0677" + }, + { + "@id": "https://orcid.org/0009-0004-5536-7210" } ], "name": [ @@ -205,11 +205,11 @@ "version": "!>=25.04.0" }, { - "@id": "#eb65e4a1-3330-4f79-98cb-e1b2a7f86b81", + "@id": "#08014bc0-7c74-49f0-a201-fcc18504ce2b", "@type": "TestSuite", "instance": [ { - "@id": "#58467589-6ef2-4f18-8ebc-b5ecd9967ecd" + "@id": "#c3eeced1-9e95-4a6c-932f-c4ceae1158d2" } ], "mainEntity": { @@ -218,7 +218,7 @@ "name": "Test suite for nf-core/seqinspector" }, { - "@id": "#58467589-6ef2-4f18-8ebc-b5ecd9967ecd", + "@id": "#c3eeced1-9e95-4a6c-932f-c4ceae1158d2", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/seqinspector", "resource": "repos/nf-core/seqinspector/actions/workflows/nf-test.yml", @@ -352,16 +352,10 @@ "url": "https://nf-co.re/" }, { - "@id": "https://orcid.org/0009-0004-5536-7210", - "@type": "Person", - "email": "163723002+agrima2010@users.noreply.github.com", - "name": "Agrima Bhatt" - }, - { - "@id": "https://orcid.org/0000-0003-1675-0677", + "@id": "#max.u.garcia@gmail.com", "@type": "Person", - "email": "mahesh.binzer-panchal@nbis.se", - "name": "Mahesh Binzer-Panchal" + "email": "max.u.garcia@gmail.com", + "name": "Maxime U Garcia" }, { "@id": "https://orcid.org/0000-0003-2702-1103", @@ -376,10 +370,16 @@ "name": "Alfred Kedhammar" }, { - "@id": "#max.u.garcia@gmail.com", + "@id": "https://orcid.org/0000-0003-1675-0677", "@type": "Person", - "email": "max.u.garcia@gmail.com", - "name": "Maxime U Garcia" + "email": "mahesh.binzer-panchal@nbis.se", + "name": "Mahesh Binzer-Panchal" + }, + { + "@id": "https://orcid.org/0009-0004-5536-7210", + "@type": "Person", + "email": "163723002+agrima2010@users.noreply.github.com", + "name": "Agrima Bhatt" } ] } \ No newline at end of file