Skip to content

Commit

Permalink
Merge pull request #195 from rhpvorderman/missingquals
Browse files Browse the repository at this point in the history
Fix error where missing qualities in BAM file were not properly converted
  • Loading branch information
rhpvorderman authored Oct 4, 2024
2 parents 969b07a + 4d8de8a commit fb63df7
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Changelog
version 0.12.0-dev
------------------
+ Fix a bug where BAM files with missing quality sequences were inproperly
handled.
+ Update internal UniVec database to version from November 21st 2023.

version 0.11.1
Expand Down
11 changes: 10 additions & 1 deletion src/sequali/_qcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,16 @@ BamParser__next__(BamParser *self)
fastq_buffer_cursor += seq_length;
memcpy(fastq_buffer_cursor, "\n+\n", 3);
fastq_buffer_cursor += 3;
decode_bam_qualities(fastq_buffer_cursor, bam_qual_start, seq_length);
if (seq_length && bam_qual_start[0] == 0xff) {
/* If qualities are missing, all bases are set to 0xff, which
is an invalid phred value. Create a quality string with only
zero Phreds for a valid FASTQ representation */
memset(fastq_buffer_cursor, 33, seq_length);
}
else {
decode_bam_qualities(fastq_buffer_cursor, bam_qual_start,
seq_length);
}
fastq_buffer_cursor += seq_length;
fastq_buffer_cursor[0] = '\n';
fastq_buffer_cursor += 1;
Expand Down
Binary file added tests/data/missing_quals.bam
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/data/missing_quals.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@HD VN:1.6 SO:unsorted
@RG ID:A SM:simple
Myheader 4 * 0 0 * * 0 0 GATTACA * RG:Z:A
10 changes: 10 additions & 0 deletions tests/test_bam_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,13 @@ def test_small_initial_buffer(initial_buffersize):
with xopen.xopen(SIMPLE_BAM, "rb") as fileobj:
parser = BamParser(fileobj, initial_buffersize=initial_buffersize)
assert len(list(parser)) == 3


def test_bam_parser_no_quals():
with xopen.xopen(DATA / "missing_quals.bam", "rb") as f:
parser = BamParser(f)
records = list(parser)
assert len(records) == 1
assert records[0][0].name() == "Myheader"
assert records[0][0].sequence() == "GATTACA"
assert records[0][0].qualities() == "!!!!!!!"

0 comments on commit fb63df7

Please sign in to comment.