Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/py_nvd/_fingerprint.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"main.nf": "d3df999c77a6754811017c07fa446c551d8334e72822a6df1c5cdcacb4715ebb",
"nextflow.config": "8d1bbbd14e66c6813a75fce1e3aca4307704fa1e4cd6552fb2d653dd60f78d51"
"nextflow.config": "0b83a6d10e66f13e1821df96ec76849df034bb97c7758993a0c54cdaa3a38166"
}
14 changes: 13 additions & 1 deletion lib/py_nvd/cli/commands/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,17 @@ def preset_register(
dedup: bool | None = typer.Option(
None,
"--dedup/--no-dedup",
help="Deduplicate reads",
help="Deduplicate reads (umbrella: enables both --dedup-seq and --dedup-pos)",
),
dedup_seq: bool | None = typer.Option(
None,
"--dedup-seq/--no-dedup-seq",
help="Sequence-based deduplication with clumpify",
),
dedup_pos: bool | None = typer.Option(
None,
"--dedup-pos/--no-dedup-pos",
help="Positional deduplication with samtools markdup",
),
trim_adapters: bool | None = typer.Option(
None,
Expand Down Expand Up @@ -225,6 +235,8 @@ def preset_register(
"entropy": entropy,
"preprocess": preprocess,
"dedup": dedup,
"dedup_seq": dedup_seq,
"dedup_pos": dedup_pos,
"trim_adapters": trim_adapters,
"scrub_host_reads": scrub_host_reads,
"filter_reads": filter_reads,
Expand Down
16 changes: 15 additions & 1 deletion lib/py_nvd/cli/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,19 @@ def run(
dedup: bool | None = typer.Option(
None,
"--dedup/--no-dedup",
help="Deduplicate reads (default: follows --preprocess)",
help="Deduplicate reads (umbrella: enables both --dedup-seq and --dedup-pos)",
rich_help_panel=PANEL_PREPROCESSING,
),
dedup_seq: bool | None = typer.Option(
None,
"--dedup-seq/--no-dedup-seq",
help="Sequence-based deduplication with clumpify (default: follows --dedup)",
rich_help_panel=PANEL_PREPROCESSING,
),
dedup_pos: bool | None = typer.Option(
None,
"--dedup-pos/--no-dedup-pos",
help="Positional deduplication with samtools markdup (default: follows --dedup)",
rich_help_panel=PANEL_PREPROCESSING,
),
trim_adapters: bool | None = typer.Option(
Expand Down Expand Up @@ -625,6 +637,8 @@ def run(
"preprocess": preprocess,
"merge_pairs": merge_pairs,
"dedup": dedup,
"dedup_seq": dedup_seq,
"dedup_pos": dedup_pos,
"trim_adapters": trim_adapters,
"scrub_host_reads": scrub_host_reads,
"filter_reads": filter_reads,
Expand Down
12 changes: 11 additions & 1 deletion lib/py_nvd/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,17 @@ class NvdParams(BaseModel):
)
dedup: bool | None = Field(
None,
description="Deduplicate reads",
description="Deduplicate reads (umbrella: enables both dedup_seq and dedup_pos)",
json_schema_extra={"category": "Preprocessing"},
)
dedup_seq: bool | None = Field(
None,
description="Sequence-based deduplication with clumpify (preprocessing)",
json_schema_extra={"category": "Preprocessing"},
)
dedup_pos: bool | None = Field(
None,
description="Positional deduplication with samtools markdup (after alignment)",
json_schema_extra={"category": "Preprocessing"},
)
trim_adapters: bool | None = Field(
Expand Down
4 changes: 3 additions & 1 deletion lib/py_nvd/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
SCHEMA_FILENAME = "nvd-params.latest.schema.json"

# GitHub raw URL for schema (fallback and for generated templates)
SCHEMA_URL = "https://raw.githubusercontent.com/dhoconno/nvd/main/schemas/nvd-params.v2.4.0.schema.json"
SCHEMA_URL = "https://raw.githubusercontent.com/dhoconno/nvd/main/schemas/nvd-params.v2.5.0.schema.json"


def _find_schema_path() -> Path:
Expand Down Expand Up @@ -281,6 +281,8 @@ def _yaml_analysis_section(
"preprocess",
"merge_pairs",
"dedup",
"dedup_seq",
"dedup_pos",
"trim_adapters",
"scrub_host_reads",
"filter_reads",
Expand Down
16 changes: 9 additions & 7 deletions modules/minimap2.nf
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
/*
* Map reads to contigs using minimap2, with optional duplicate marking.
* Map reads to contigs using minimap2, with optional positional duplicate marking.
*
* When params.dedup is true, the pipeline includes samtools collate/fixmate/markdup
* to identify and remove PCR/optical duplicates. This is recommended for amplicon
* or high-duplication libraries but adds computational overhead.
* When positional dedup is enabled (dedup_pos or dedup), the pipeline includes
* samtools collate/fixmate/markdup to identify and remove PCR/optical duplicates.
* This is recommended for amplicon or high-duplication libraries but adds
* computational overhead.
*
* When params.dedup is false, reads are simply filtered (unmapped removed) and
* coordinate-sorted, which is sufficient for many viral metagenomics applications.
* When positional dedup is disabled, reads are simply filtered (unmapped removed)
* and coordinate-sorted, which is sufficient for many viral metagenomics applications.
*
* Output is always a coordinate-sorted, indexed BAM file.
*/
Expand All @@ -30,7 +31,8 @@ process MAP_READS_TO_CONTIGS {
def preset = platform == 'ont' || platform == 'sra'
? "map-ont"
: "sr"
if (params.dedup) {
def should_dedup_pos = params.dedup_pos ?: params.dedup ?: params.preprocess
if (should_dedup_pos) {
"""
minimap2 -ax ${preset} -t ${task.cpus} ${contigs} ${reads} \\
| samtools view -b -F 4 \\
Expand Down
6 changes: 5 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ params {
// whether to merge paired read mates based on overlaps between them
merge_pairs = null

// whether to deduplicate sequencing reads
// whether to deduplicate sequencing reads (umbrella: enables both dedup_seq and dedup_pos)
dedup = null
// sequence-based deduplication with clumpify (preprocessing)
dedup_seq = null
// positional deduplication with samtools markdup (after alignment)
dedup_pos = null

// Adapter trimming (Illumina only)
trim_adapters = null
Expand Down
1 change: 1 addition & 0 deletions schemas/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This directory contains JSON Schema definitions for the NVD pipeline.
|--------|-------------|
| `nvd-params.v2.3.0.schema.json` | Pipeline parameters schema (version 2.3.0) |
| `nvd-params.v2.4.0.schema.json` | Pipeline parameters schema (version 2.4.0) |
| `nvd-params.v2.5.0.schema.json` | Pipeline parameters schema (version 2.5.0) |
| `nvd-params.latest.schema.json` | Symlink to the current version |

## Usage
Expand Down
2 changes: 1 addition & 1 deletion schemas/nvd-params.latest.schema.json
Loading