dholab · nrminor · Feb 17, 2026 · Feb 13, 2026 · Feb 12, 2026 · Feb 13, 2026
diff --git a/lib/py_nvd/_fingerprint.json b/lib/py_nvd/_fingerprint.json
@@ -1,4 +1,4 @@
 {
   "main.nf": "d3df999c77a6754811017c07fa446c551d8334e72822a6df1c5cdcacb4715ebb",
-  "nextflow.config": "8d1bbbd14e66c6813a75fce1e3aca4307704fa1e4cd6552fb2d653dd60f78d51"
+  "nextflow.config": "0b83a6d10e66f13e1821df96ec76849df034bb97c7758993a0c54cdaa3a38166"
 }
diff --git a/lib/py_nvd/cli/commands/preset.py b/lib/py_nvd/cli/commands/preset.py
@@ -160,7 +160,17 @@ def preset_register(
     dedup: bool | None = typer.Option(
         None,
         "--dedup/--no-dedup",
-        help="Deduplicate reads",
+        help="Deduplicate reads (umbrella: enables both --dedup-seq and --dedup-pos)",
+    ),
+    dedup_seq: bool | None = typer.Option(
+        None,
+        "--dedup-seq/--no-dedup-seq",
+        help="Sequence-based deduplication with clumpify",
+    ),
+    dedup_pos: bool | None = typer.Option(
+        None,
+        "--dedup-pos/--no-dedup-pos",
+        help="Positional deduplication with samtools markdup",
     ),
     trim_adapters: bool | None = typer.Option(
         None,
@@ -225,6 +235,8 @@ def preset_register(
         "entropy": entropy,
         "preprocess": preprocess,
         "dedup": dedup,
+        "dedup_seq": dedup_seq,
+        "dedup_pos": dedup_pos,
         "trim_adapters": trim_adapters,
         "scrub_host_reads": scrub_host_reads,
         "filter_reads": filter_reads,

diff --git a/lib/py_nvd/cli/commands/run.py b/lib/py_nvd/cli/commands/run.py
@@ -337,7 +337,19 @@ def run(
     dedup: bool | None = typer.Option(
         None,
         "--dedup/--no-dedup",
-        help="Deduplicate reads (default: follows --preprocess)",
+        help="Deduplicate reads (umbrella: enables both --dedup-seq and --dedup-pos)",
+        rich_help_panel=PANEL_PREPROCESSING,
+    ),
+    dedup_seq: bool | None = typer.Option(
+        None,
+        "--dedup-seq/--no-dedup-seq",
+        help="Sequence-based deduplication with clumpify (default: follows --dedup)",
+        rich_help_panel=PANEL_PREPROCESSING,
+    ),
+    dedup_pos: bool | None = typer.Option(
+        None,
+        "--dedup-pos/--no-dedup-pos",
+        help="Positional deduplication with samtools markdup (default: follows --dedup)",
         rich_help_panel=PANEL_PREPROCESSING,
     ),
     trim_adapters: bool | None = typer.Option(
@@ -625,6 +637,8 @@ def run(
         "preprocess": preprocess,
         "merge_pairs": merge_pairs,
         "dedup": dedup,
+        "dedup_seq": dedup_seq,
+        "dedup_pos": dedup_pos,
         "trim_adapters": trim_adapters,
         "scrub_host_reads": scrub_host_reads,
         "filter_reads": filter_reads,

diff --git a/lib/py_nvd/models.py b/lib/py_nvd/models.py
@@ -929,7 +929,17 @@ class NvdParams(BaseModel):
     )
     dedup: bool | None = Field(
         None,
-        description="Deduplicate reads",
+        description="Deduplicate reads (umbrella: enables both dedup_seq and dedup_pos)",
+        json_schema_extra={"category": "Preprocessing"},
+    )
+    dedup_seq: bool | None = Field(
+        None,
+        description="Sequence-based deduplication with clumpify (preprocessing)",
+        json_schema_extra={"category": "Preprocessing"},
+    )
+    dedup_pos: bool | None = Field(
+        None,
+        description="Positional deduplication with samtools markdup (after alignment)",
         json_schema_extra={"category": "Preprocessing"},
     )
     trim_adapters: bool | None = Field(

diff --git a/lib/py_nvd/params.py b/lib/py_nvd/params.py
@@ -22,7 +22,7 @@
 SCHEMA_FILENAME = "nvd-params.latest.schema.json"
 
 # GitHub raw URL for schema (fallback and for generated templates)
-SCHEMA_URL = "https://raw.githubusercontent.com/dhoconno/nvd/main/schemas/nvd-params.v2.4.0.schema.json"
+SCHEMA_URL = "https://raw.githubusercontent.com/dhoconno/nvd/main/schemas/nvd-params.v2.5.0.schema.json"
 
 
 def _find_schema_path() -> Path:
@@ -281,6 +281,8 @@ def _yaml_analysis_section(
         "preprocess",
         "merge_pairs",
         "dedup",
+        "dedup_seq",
+        "dedup_pos",
         "trim_adapters",
         "scrub_host_reads",
         "filter_reads",

diff --git a/modules/minimap2.nf b/modules/minimap2.nf
@@ -1,12 +1,13 @@
 /*
- * Map reads to contigs using minimap2, with optional duplicate marking.
+ * Map reads to contigs using minimap2, with optional positional duplicate marking.
  *
- * When params.dedup is true, the pipeline includes samtools collate/fixmate/markdup
- * to identify and remove PCR/optical duplicates. This is recommended for amplicon
- * or high-duplication libraries but adds computational overhead.
+ * When positional dedup is enabled (dedup_pos or dedup), the pipeline includes
+ * samtools collate/fixmate/markdup to identify and remove PCR/optical duplicates.
+ * This is recommended for amplicon or high-duplication libraries but adds
+ * computational overhead.
  *
- * When params.dedup is false, reads are simply filtered (unmapped removed) and
- * coordinate-sorted, which is sufficient for many viral metagenomics applications.
+ * When positional dedup is disabled, reads are simply filtered (unmapped removed)
+ * and coordinate-sorted, which is sufficient for many viral metagenomics applications.
  *
  * Output is always a coordinate-sorted, indexed BAM file.
  */
@@ -30,7 +31,8 @@ process MAP_READS_TO_CONTIGS {
     def preset = platform == 'ont' || platform == 'sra'
         ? "map-ont"
         : "sr"
-    if (params.dedup) {
+    def should_dedup_pos = params.dedup_pos ?: params.dedup ?: params.preprocess
+    if (should_dedup_pos) {
         """
         minimap2 -ax ${preset} -t ${task.cpus} ${contigs} ${reads} \\
         | samtools view -b -F 4 \\

diff --git a/nextflow.config b/nextflow.config
@@ -76,8 +76,12 @@ params {
     // whether to merge paired read mates based on overlaps between them
     merge_pairs               = null
 
-    // whether to deduplicate sequencing reads
+    // whether to deduplicate sequencing reads (umbrella: enables both dedup_seq and dedup_pos)
     dedup                     = null
+    // sequence-based deduplication with clumpify (preprocessing)
+    dedup_seq                 = null
+    // positional deduplication with samtools markdup (after alignment)
+    dedup_pos                 = null
 
     // Adapter trimming (Illumina only)
     trim_adapters             = null

diff --git a/schemas/README.md b/schemas/README.md
@@ -8,6 +8,7 @@ This directory contains JSON Schema definitions for the NVD pipeline.
 |--------|-------------|
 | `nvd-params.v2.3.0.schema.json` | Pipeline parameters schema (version 2.3.0) |
 | `nvd-params.v2.4.0.schema.json` | Pipeline parameters schema (version 2.4.0) |
+| `nvd-params.v2.5.0.schema.json` | Pipeline parameters schema (version 2.5.0) |
 | `nvd-params.latest.schema.json` | Symlink to the current version |
 
 ## Usage

diff --git a/schemas/nvd-params.latest.schema.json b/schemas/nvd-params.latest.schema.json
@@ -1 +1 @@
-nvd-params.v2.4.0.schema.json
+nvd-params.v2.5.0.schema.json
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		nvd-params.v2.4.0.schema.json
		nvd-params.v2.5.0.schema.json