Skip to content

Commit

Permalink
fix: sum_of_base_qualities should be zero when a read has no base qua…
Browse files Browse the repository at this point in the history
…lities
  • Loading branch information
msto committed Jan 13, 2025
1 parent b0b4227 commit 0261da9
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
10 changes: 9 additions & 1 deletion fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,16 +849,24 @@ def from_read(cls, read: pysam.AlignedSegment) -> List["SupplementaryAlignment"]
def sum_of_base_qualities(rec: AlignedSegment, min_quality_score: int = 15) -> int:
"""Calculate the sum of base qualities score for an alignment record.
This function is useful for calculating the "mate score" as implemented in samtools fixmate.
This function is useful for calculating the "mate score" as implemented in `samtools fixmate`.
Consistently with `samtools fixmate`, this function returns 0 if the record has no base
qualities.
Args:
rec: The alignment record to calculate the sum of base qualities from.
min_quality_score: The minimum base quality score to use for summation.
Returns:
The sum of base qualities on the input record. 0 if the record has no base qualities.
See:
[`calc_sum_of_base_qualities()`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L227-L238)
[`MD_MIN_QUALITY`](https://github.com/samtools/samtools/blob/4f3a7397a1f841020074c0048c503a01a52d5fa2/bam_mate.c#L42)
"""
if rec.query_qualities is None:
return 0

score: int = sum(qual for qual in rec.query_qualities if qual >= min_quality_score)
return score

Expand Down
2 changes: 1 addition & 1 deletion fgpyo/util/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ def attr_from(
# True, because python, so we need to check for that explicitly
if not set_value and attribute.type is not None and not attribute.type == bool:
try:
return_value = attribute.type(str_value) # type: ignore[operator]
return_value = attribute.type(str_value)
set_value = True
except (ValueError, TypeError):
pass
Expand Down
28 changes: 28 additions & 0 deletions tests/fgpyo/sam/test_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import pysam
import pytest
from pysam import AlignedSegment
from pysam import AlignmentFile
from pysam import AlignmentHeader

import fgpyo.sam as sam
Expand Down Expand Up @@ -616,6 +618,32 @@ def test_sum_of_base_qualities_some_below_minimum() -> None:
assert sum_of_base_qualities(single, min_quality_score=4) == 9


def test_sum_of_base_qualities_unmapped(tmp_path: Path) -> None:
builder = SamBuilder(r1_len=5, r2_len=5)
single: AlignedSegment = builder.add_single()

# NB: assigning to `query_qualities` does not affect the cached object returned by the property,
# but it does change the underlying attribute used when constructing the string representation
single.query_qualities = None
record_str = single.to_string()

bam_path: Path = tmp_path / "no_qualities.bam"

with bam_path.open("w") as bam_file:
bam_file.write(str(builder.header))
bam_file.write(f"{record_str}\n")

with AlignmentFile(str(bam_path)) as bam:
record = next(bam)

# NB: writing to the temp file above is necessary to construct an `AlignedSegment` with no base
# qualities, as `SamBuilder` does not currently permit the construction of such records.
# TODO simplify this after `SamBuilder` is updated to support this.
# https://github.com/fulcrumgenomics/fgpyo/issues/211
assert record.query_qualities is None
assert sum_of_base_qualities(record) == 0


def test_calc_edit_info_no_edits() -> None:
chrom = "ACGCTAGACTGCTAGCAGCATCTCATAGCACTTCGCGCTATAGCGATATAAATATCGCGATCTAGCG"
builder = SamBuilder(r1_len=30)
Expand Down

0 comments on commit 0261da9

Please sign in to comment.