diff --git a/CHANGELOG.md b/CHANGELOG.md
index d7eea135..e92aa439 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,8 +21,10 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c
- [#53](https://github.com/nf-core/seqinspector/pull/53) Add FastQ-Screen database multiplexing and limit scope of nf-test in CI.
- [#96](https://github.com/nf-core/seqinspector/pull/96) Added missing citations to citation tool
- [#103](https://github.com/nf-core/seqinspector/pull/103) Configure full-tests
+- [#109](https://github.com/nf-core/seqinspector/pull/109) Adds ToulligQC module for long read QC
- [#110](https://github.com/nf-core/seqinspector/pull/110) Update input schema to accept either tar file or directory as rundir, and fastq messages and patterns.
+
### `Fixed`
- [#71](https://github.com/nf-core/seqinspector/pull/71) FASTQSCREEN does not fail when multiple reads are provided.
diff --git a/CITATIONS.md b/CITATIONS.md
index 208cfa1e..b4779433 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -28,6 +28,8 @@
- [Seqtk](https://github.com/lh3/seqtk)
+- [ToulligQC](https://github.com/GenomiqueENS/toulligQ)
+
## Software packaging/containerisation tools
- [Anaconda](https://anaconda.com)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index e960d07e..40b5fdde 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -2,6 +2,7 @@ report_comment: >
This report has been generated by the nf-core/seqinspector
analysis pipeline. For information about how to interpret these results, please see the
documentation.
+ If ToulligQC was used, a separate report is available in the results folder.
report_section_order:
"nf-core-seqinspector-methods-description":
order: -1000
diff --git a/conf/modules.config b/conf/modules.config
index 5a0b7ccd..d5e8abb7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -35,6 +35,15 @@ process {
]
}
+ withName: 'TOULLIGQC' {
+ ext.args = ''
+ publishDir = [
+ path: { "${params.outdir}/toulligqc" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
+
withName: 'MULTIQC_GLOBAL' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
diff --git a/docs/output.md b/docs/output.md
index 3d3c3497..2da95399 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -80,6 +80,17 @@ The `.csv` is provided as a pipeline parameter `fastq_screen_references` and is
[SeqFu](https://telatin.github.io/seqfu2/) is general-purpose program to manipulate and parse information from FASTA/FASTQ files, supporting gzipped input files. Includes functions to interleave and de-interleave FASTQ files, to rename sequences and to count and print statistics on sequence lengths. In this pipeline, the `seqfu stats` module is used to produce general quality metrics statistics.
+### ToulligQC
+
+
+Output files
+
+- `toulligqc/`
+ - `*.data`: ToulligQC output text file containing log information and all analysis results
+ - `*.html`: ToulligQC html report file
+
+[ToulligQC](https://github.com/GenomiqueENS/toulligQC) is dedicated to the QC analyses of Oxford Nanopore runs. This software is written in Python and developped by the GenomiqueENS core facility of the Institute of Biology of the Ecole Normale Superieure (IBENS).
+
### MultiQC
nf-core/seqinspector will generate the following MultiQC reports:
diff --git a/modules.json b/modules.json
index 28c9108c..36305ddf 100644
--- a/modules.json
+++ b/modules.json
@@ -40,6 +40,11 @@
"branch": "master",
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
+ },
+ "toulligqc": {
+ "branch": "master",
+ "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
+ "installed_by": ["modules"]
}
}
},
diff --git a/modules/nf-core/toulligqc/environment.yml b/modules/nf-core/toulligqc/environment.yml
new file mode 100644
index 00000000..e1632a8b
--- /dev/null
+++ b/modules/nf-core/toulligqc/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::toulligqc=2.5.6
diff --git a/modules/nf-core/toulligqc/main.nf b/modules/nf-core/toulligqc/main.nf
new file mode 100644
index 00000000..71ced043
--- /dev/null
+++ b/modules/nf-core/toulligqc/main.nf
@@ -0,0 +1,63 @@
+process TOULLIGQC {
+ label 'process_low'
+ tag "$meta.id"
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/toulligqc:2.5.6--pyhdfd78af_0':
+ 'biocontainers/toulligqc:2.5.6--pyhdfd78af_0' }"
+
+ input:
+
+ tuple val(meta), path(ontfile)
+
+
+ output:
+ tuple val(meta), path("*/*.data") , emit: report_data
+ tuple val(meta), path("*/*.html") , emit: report_html, optional: true
+ tuple val(meta), path("*/images/*.html") , emit: plots_html
+ tuple val(meta), path("*/images/plotly.min.js") , emit: plotly_js
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ def input_file = ("$ontfile".endsWith(".fastq") || "$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" :
+ ("$ontfile".endsWith(".txt") || "$ontfile".endsWith(".txt.gz")) ? "--sequencing-summary-source ${ontfile}" :
+ ("$ontfile".endsWith(".bam")) ? "--bam ${ontfile}" : ''
+
+ """
+ toulligqc \\
+ $input_file \\
+ --output-directory ${prefix} \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ toulligqc: \$(toulligqc --version)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ mkdir ${prefix}
+ mkdir ${prefix}/images
+ touch ${prefix}/report.data
+ touch ${prefix}/images/Correlation_between_read_length_and_PHRED_score.html
+ touch ${prefix}/images/Distribution_of_read_lengths.html
+ touch ${prefix}/images/PHRED_score_density_distribution.html
+ touch ${prefix}/images/Read_count_histogram.html
+ touch ${prefix}/images/plotly.min.js
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ toulligqc: \$(toulligqc --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/toulligqc/meta.yml b/modules/nf-core/toulligqc/meta.yml
new file mode 100644
index 00000000..b269eccf
--- /dev/null
+++ b/modules/nf-core/toulligqc/meta.yml
@@ -0,0 +1,76 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "toulligqc"
+description: "A post sequencing QC tool for Oxford Nanopore sequencers"
+keywords:
+ - nanopore sequencing
+ - quality control
+ - genomics
+tools:
+ - "toulligqc":
+ description: "A post sequencing QC tool for Oxford Nanopore sequencers"
+ homepage: https://github.com/GenomiqueENS/toulligQC
+ documentation: https://github.com/GenomiqueENS/toulligQC
+ tool_dev_url: https://github.com/GenomiqueENS/toulligQC
+ licence: ["CECILL-2.1"]
+ identifier: biotools:ToulligQC
+
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ontfile:
+ type: file
+ description: Input ONT file
+ pattern: "*.{fastq,fastq.gz,fq,fq.gz,txt,txt.gz,bam}"
+output:
+ - report_data:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*/*.data":
+ type: file
+ description: Report data emitted from toulligqc
+ pattern: "*.data"
+ - report_html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*/*.html":
+ type: file
+ description: Report data in html format
+ pattern: "*.html"
+ - plots_html:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*/images/*.html":
+ type: file
+ description: Plots emitted in html format
+ pattern: "*.html"
+ - plotly_js:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*/images/plotly.min.js":
+ type: file
+ description: Plots emitted from toulligqc
+ pattern: "plotly.min.js"
+ - versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Salome-Brunon"
+maintainers:
+ - "@Salome-Brunon"
diff --git a/modules/nf-core/toulligqc/tests/main.nf.test b/modules/nf-core/toulligqc/tests/main.nf.test
new file mode 100644
index 00000000..5bbad941
--- /dev/null
+++ b/modules/nf-core/toulligqc/tests/main.nf.test
@@ -0,0 +1,125 @@
+nextflow_process {
+
+ name "Test Process TOULLIGQC"
+ script "../main.nf"
+ process "TOULLIGQC"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "toulligqc"
+
+ test("sarscov2 - nanopore sequencing_summary") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test2.sequencing_summary.txt', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("toulligqc_versions_sequencing_summary") },
+ { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() },
+ { assert process.out.report_html[0][1] ==~ ".*/report.html"}
+ )
+ }
+
+ }
+
+ test("sarscov2 - nanopore sequencing_summary + barcodes") {
+ config "./nextflow.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/sequencing_summary/test2.sequencing_summary.txt', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("toulligqc_versions_sequencing_summary_barcodes") },
+ { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() },
+ { assert process.out.report_html[0][1] ==~ ".*/report.html"}
+ )
+ }
+
+ }
+
+ test("sarscov2 - nanopore fastq") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("toulligqc_versions_fastq") },
+ { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..67]).match() },
+ { assert process.out.report_html[0][1] ==~ ".*/report.html"}
+ )
+ }
+
+ }
+
+ test("sarscov2 - nanopore bam") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/bam/test.sorted.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("toulligqc_versions_bam") },
+ { assert snapshot(file(process.out.report_data.get(0).get(1)).readLines()[11..74]).match() },
+ { assert process.out.report_html[0][1] ==~ ".*/report.html"}
+ )
+ }
+
+ }
+ test("sarscov2 - nanopore bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/bam/test.sorted.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("toulligqc_versions_stub") }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/toulligqc/tests/main.nf.test.snap b/modules/nf-core/toulligqc/tests/main.nf.test.snap
new file mode 100644
index 00000000..025c3c2b
--- /dev/null
+++ b/modules/nf-core/toulligqc/tests/main.nf.test.snap
@@ -0,0 +1,319 @@
+{
+ "sarscov2 - nanopore sequencing_summary": {
+ "content": [
+ [
+ "sequencing.telemetry.extractor.software.analysis=1d_basecalling",
+ "basecaller.sequencing.summary.1d.extractor.read.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.count=0",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=1.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.0",
+ "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=100.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=0.0",
+ "basecaller.sequencing.summary.1d.extractor.yield=38253",
+ "basecaller.sequencing.summary.1d.extractor.n50=365",
+ "basecaller.sequencing.summary.1d.extractor.l50=67",
+ "basecaller.sequencing.summary.1d.extractor.run.time=29006.4915",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=85.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=1.1764705882352942",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=0.4412231534591759",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=3.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.3747982818722",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=219.3747982818722",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=nan",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.084663391113281",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.7714887857437134",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=7.7340922355651855",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.089608192443848",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=11.957954406738281",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=13.238139390945435",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=17.272123336791992",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=12.084663391113281",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.7714887857437134",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=7.7340922355651855",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.089608192443848",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=11.957954406738281",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=13.238139390945435",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=17.272123336791992",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.max=nan"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:29.481355"
+ },
+ "toulligqc_versions_sequencing_summary_barcodes": {
+ "content": [
+ [
+ "versions.yml:md5,3be42e94e756b5a89167a891d287c538"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:41.607538"
+ },
+ "sarscov2 - nanopore sequencing_summary + barcodes": {
+ "content": [
+ [
+ "toulligqc.info.extractor.duration=0.0",
+ "sequencing.telemetry.extractor.software.analysis=1d_basecalling",
+ "basecaller.sequencing.summary.1d.extractor.read.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.count=0",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=1.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.0",
+ "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=100.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=0.0",
+ "basecaller.sequencing.summary.1d.extractor.yield=38253",
+ "basecaller.sequencing.summary.1d.extractor.n50=365",
+ "basecaller.sequencing.summary.1d.extractor.l50=67",
+ "basecaller.sequencing.summary.1d.extractor.run.time=29006.4915",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=85.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=1.1764705882352942",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=0.4412231534591759",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=3.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.3747982818722",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=219.3747982818722",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=nan",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.084663391113281",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.7714887857437134",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=7.7340922355651855",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.089608192443848",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=11.957954406738281",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=13.238139390945435",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=17.272123336791992",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=12.084663391113281",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.7714887857437134",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=7.7340922355651855",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.089608192443848",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=11.957954406738281",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=13.238139390945435",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=17.272123336791992",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=nan",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=nan"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:41.63299"
+ },
+ "toulligqc_versions_bam": {
+ "content": [
+ [
+ "versions.yml:md5,3be42e94e756b5a89167a891d287c538"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:01:05.074867"
+ },
+ "sarscov2 - nanopore bam": {
+ "content": [
+ [
+ "sequencing.telemetry.extractor.run.id=Unknown",
+ "sequencing.telemetry.extractor.sample.id=Unknown",
+ "sequencing.telemetry.extractor.model.file=Unknown",
+ "sequencing.telemetry.extractor.software.name=minimap2",
+ "sequencing.telemetry.extractor.software.version=2.17-r974-dirty",
+ "sequencing.telemetry.extractor.flowcell.id=Unknown",
+ "sequencing.telemetry.extractor.basecalling.date=Unknown",
+ "sequencing.telemetry.extractor.pass.threshold.qscore=9",
+ "basecaller.sequencing.summary.1d.extractor.read.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.count=96",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.count=4",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=0.96",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.04",
+ "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=96.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=4.0",
+ "basecaller.sequencing.summary.1d.extractor.yield=38253",
+ "basecaller.sequencing.summary.1d.extractor.n50=365",
+ "basecaller.sequencing.summary.1d.extractor.l50=67",
+ "basecaller.sequencing.summary.1d.extractor.run.time=99.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.count=1.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.mean=100.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.std=nan",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.min=100.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.25%=100.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.50%=100.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.75%=100.0",
+ "basecaller.sequencing.summary.1d.extractor.channel.occupancy.statistics.max=100.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.37479828187222",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=385.6145833333333",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=222.9568931568171",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=272.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=312.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=308.5",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=79.87698875980074",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=261.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=267.75",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=272.5",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=313.25",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=428.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.82710075378418",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.9102991819381714",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=8.199999809265137",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.702500104904175",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=12.714999675750732",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=14.082499980926514",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=18.110000610351562",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=13.002917289733887",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.737709403038025",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=9.84000015258789",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.90749979019165",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=12.820000171661377",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=14.09500002861023",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=18.110000610351562"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:01:05.091838"
+ },
+ "toulligqc_versions_stub": {
+ "content": [
+ [
+ "versions.yml:md5,3be42e94e756b5a89167a891d287c538"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:01:12.876406"
+ },
+ "toulligqc_versions_fastq": {
+ "content": [
+ [
+ "versions.yml:md5,3be42e94e756b5a89167a891d287c538"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:52.810853"
+ },
+ "toulligqc_versions_sequencing_summary": {
+ "content": [
+ [
+ "versions.yml:md5,3be42e94e756b5a89167a891d287c538"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:29.458805"
+ },
+ "sarscov2 - nanopore fastq": {
+ "content": [
+ [
+ "sequencing.telemetry.extractor.run.id=Unknow",
+ "sequencing.telemetry.extractor.sample.id=Unknow",
+ "sequencing.telemetry.extractor.model.file=Unknow",
+ "basecaller.sequencing.summary.1d.extractor.read.count=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.count=96",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.count=4",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.ratio=0.96",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.ratio=0.04",
+ "basecaller.sequencing.summary.1d.extractor.read.count.frequency=100",
+ "basecaller.sequencing.summary.1d.extractor.read.pass.frequency=96.0",
+ "basecaller.sequencing.summary.1d.extractor.read.fail.frequency=4.0",
+ "basecaller.sequencing.summary.1d.extractor.yield=38253",
+ "basecaller.sequencing.summary.1d.extractor.n50=365",
+ "basecaller.sequencing.summary.1d.extractor.l50=67",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.count=100.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.mean=382.53",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.std=219.37479828187222",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.25%=271.75",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.50%=305.5",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.mean=385.6145833333333",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.std=222.9568931568171",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.min=228.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.25%=272.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.50%=312.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.75%=418.0",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.sequence.length.max=1664.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.mean=308.5",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.std=79.87698875980074",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.min=261.0",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.25%=267.75",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.50%=272.5",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.75%=313.25",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.sequence.length.max=428.0",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.mean=12.82710075378418",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.std=1.9102991819381714",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.min=8.199999809265137",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.25%=11.702500104904175",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.50%=12.714999675750732",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.75%=14.082499980926514",
+ "basecaller.sequencing.summary.1d.extractor.all.read.qscore.max=18.110000610351562",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.mean=13.002917289733887",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.std=1.737709403038025",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.min=9.84000015258789",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.25%=11.90749979019165",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.50%=12.820000171661377",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.75%=14.09500002861023",
+ "basecaller.sequencing.summary.1d.extractor.pass.reads.mean.qscore.max=18.110000610351562",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.mean=8.607500076293945",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.std=0.27219802141189575",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.min=8.199999809265137",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.25%=8.59000015258789",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.50%=8.735000133514404",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.75%=8.752500057220459",
+ "basecaller.sequencing.summary.1d.extractor.fail.reads.mean.qscore.max=8.760000228881836"
+ ]
+ ],
+ "timestamp": "2024-05-02T16:00:52.831534"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/toulligqc/tests/nextflow.config b/modules/nf-core/toulligqc/tests/nextflow.config
new file mode 100644
index 00000000..3e494e84
--- /dev/null
+++ b/modules/nf-core/toulligqc/tests/nextflow.config
@@ -0,0 +1,9 @@
+process {
+
+ withName: TOULLIGQC {
+ //Optional barcoding option
+ //Comma separated barcode list (with no spaces). (e.g. BC05,RB09,NB01,barcode10)
+ ext.args = '--barcoding --barcodes barcode01,barcode02,barcode03,barcode04,barcode05,barcode06,barcode07,barcode08,barcode09,barcode10,barcode11,barcode12'
+ }
+
+}
diff --git a/modules/nf-core/toulligqc/tests/tags.yml b/modules/nf-core/toulligqc/tests/tags.yml
new file mode 100644
index 00000000..8814a4d2
--- /dev/null
+++ b/modules/nf-core/toulligqc/tests/tags.yml
@@ -0,0 +1,2 @@
+toulligqc:
+ - "modules/nf-core/toulligqc/**"
diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf
index 22d37c30..f9a77e8b 100644
--- a/workflows/seqinspector.nf
+++ b/workflows/seqinspector.nf
@@ -10,6 +10,7 @@ include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { SEQFU_STATS } from '../modules/nf-core/seqfu/stats'
include { FASTQSCREEN_FASTQSCREEN } from '../modules/nf-core/fastqscreen/fastqscreen/main'
+include { TOULLIGQC } from '../modules/nf-core/toulligqc/main'
include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main'
include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main'
@@ -106,6 +107,20 @@ workflow SEQINSPECTOR {
ch_versions = ch_versions.mix(FASTQSCREEN_FASTQSCREEN.out.versions.first())
}
+ //
+ // MODULE: Run ToulligQC
+ //
+
+ // This provides useful stats of long reads
+
+ if (!("toulligqc" in skip_tools)) {
+ TOULLIGQC (
+ ch_samplesheet
+ )
+ ch_multiqc_files.mix(TOULLIGQC.out.report_data)
+ ch_versions = ch_versions.mix(TOULLIGQC.out.versions.first())
+ }
+
//
// Collate and save software versions
//