From bbefef8f1aaeac5d3488492ebeb16063d0922dcc Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Fri, 26 Dec 2025 11:05:00 +1100 Subject: [PATCH 1/6] Fix local-global alignment initialization to allow leading gaps Correctly initializes the previous score row with gap costs instead of -Infinity, permitting the global sequence (B) to start with gaps. Adds regression test case . --- src/lib.rs | 5 +++-- tests/test_seq_smith.py | 29 +++++++++++++++++++++++++++++ uv.lock | 2 +- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 295a8f7..a1cc943 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -676,9 +676,10 @@ fn _local_global_align_core(params: AlignmentParams) -> PyResult { let mut max_col = 0; for row in 0..params.sb_len { - data.prev_score[row] = std::i32::MIN; + let score = params.gap_cost(row as i32 + 1); + data.prev_score[row] = score; data.hgap_pos[row] = -1; - data.hgap_score[row] = std::i32::MIN; + data.hgap_score[row] = score.saturating_add(params.gap_open); } for col in 0..params.sa_len { diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 1e26e0d..927e70b 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -3,6 +3,7 @@ from conftest import AlignmentData from seq_smith import ( + FragmentType, encode, format_alignment_ascii, generate_cigar, @@ -454,3 +455,31 @@ def test_generate_cigar_with_deletion(common_data: AlignmentData) -> None: alignment = global_align(seqa, seqb, common_data.score_matrix, common_data.gap_open, common_data.gap_extend) cigar = generate_cigar(alignment) assert cigar == "1M1D" # AC vs A-, so A matches, then C is a deletion in seqB (query) + + +def test_local_global_align_overhangs() -> None: + # Case 1: Sequence B (Global) has overhanging tails + # A: CCCC + # B: AAACCCCAAA + # Expected: A aligns to central C's, B has leading/trailing gaps. + seqa = encode("CCCC", "ACGT") + seqb = encode("AAACCCCAAA", "ACGT") + sm = make_score_matrix("ACGT", match_score=2, mismatch_score=-2) + aln = local_global_align(seqa, seqb, sm, gap_open=-3, gap_extend=-1) + + # Check the fragments + # Expected: + # 1. GapA len 3 (AAA) + # 2. Match len 4 (CCCC) + # 3. GapA len 3 (AAA) + + frags = aln.fragments + assert len(frags) == 3 + assert frags[0].fragment_type == FragmentType.AGap + assert frags[0].len == 3 + assert frags[1].fragment_type == FragmentType.Match + assert frags[1].len == 4 + assert frags[2].fragment_type == FragmentType.AGap + assert frags[2].len == 3 + + assert aln.score == -2 diff --git a/uv.lock b/uv.lock index 1cc9c84..0828b28 100644 --- a/uv.lock +++ b/uv.lock @@ -1019,7 +1019,7 @@ wheels = [ [[package]] name = "seq-smith" -version = "0.3.0" +version = "0.4.0" source = { editable = "." } dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, From 180b717c56b268e34ac0f02983bf8ac2c49be1c5 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Fri, 26 Dec 2025 11:18:28 +1100 Subject: [PATCH 2/6] rewrite comments so they don't look like code to ruff --- tests/test_seq_smith.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 927e70b..5ccb817 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -459,8 +459,8 @@ def test_generate_cigar_with_deletion(common_data: AlignmentData) -> None: def test_local_global_align_overhangs() -> None: # Case 1: Sequence B (Global) has overhanging tails - # A: CCCC - # B: AAACCCCAAA + # sequence A: CCCC + # sequence B: AAACCCCAAA # Expected: A aligns to central C's, B has leading/trailing gaps. seqa = encode("CCCC", "ACGT") seqb = encode("AAACCCCAAA", "ACGT") From ca93141030ae0caab857ff44993f8603540d9655 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Fri, 26 Dec 2025 11:19:13 +1100 Subject: [PATCH 3/6] rewrite comments --- tests/test_seq_smith.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 5ccb817..42ad4e6 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -458,7 +458,7 @@ def test_generate_cigar_with_deletion(common_data: AlignmentData) -> None: def test_local_global_align_overhangs() -> None: - # Case 1: Sequence B (Global) has overhanging tails + # Sequence B (Global) has overhanging tails # sequence A: CCCC # sequence B: AAACCCCAAA # Expected: A aligns to central C's, B has leading/trailing gaps. From 1debd134c6fc15bfcc63dd9b714a9bfcaa0cb260 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Sat, 27 Dec 2025 17:03:07 +1100 Subject: [PATCH 4/6] Update tests/test_seq_smith.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tests/test_seq_smith.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 42ad4e6..944c650 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -3,6 +3,7 @@ from conftest import AlignmentData from seq_smith import ( + AlignmentFragment, FragmentType, encode, format_alignment_ascii, From 13e8fb4f6a711bcc180494ce28f2d47e4dc69866 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Sat, 27 Dec 2025 17:03:14 +1100 Subject: [PATCH 5/6] Update tests/test_seq_smith.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- tests/test_seq_smith.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index 944c650..c5fe278 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -468,19 +468,11 @@ def test_local_global_align_overhangs() -> None: sm = make_score_matrix("ACGT", match_score=2, mismatch_score=-2) aln = local_global_align(seqa, seqb, sm, gap_open=-3, gap_extend=-1) - # Check the fragments - # Expected: - # 1. GapA len 3 (AAA) - # 2. Match len 4 (CCCC) - # 3. GapA len 3 (AAA) - - frags = aln.fragments - assert len(frags) == 3 - assert frags[0].fragment_type == FragmentType.AGap - assert frags[0].len == 3 - assert frags[1].fragment_type == FragmentType.Match - assert frags[1].len == 4 - assert frags[2].fragment_type == FragmentType.AGap - assert frags[2].len == 3 + expected_fragments = [ + AlignmentFragment(fragment_type=FragmentType.AGap, sa_start=0, sb_start=0, len=3), + AlignmentFragment(fragment_type=FragmentType.Match, sa_start=0, sb_start=3, len=4), + AlignmentFragment(fragment_type=FragmentType.AGap, sa_start=4, sb_start=7, len=3), + ] + assert aln.fragments == expected_fragments assert aln.score == -2 From 2770927f2c1aff555360b9778182ebd4f8242334 Mon Sep 17 00:00:00 2001 From: Tobias Sargeant Date: Sat, 27 Dec 2025 17:36:53 +1100 Subject: [PATCH 6/6] manually add __init__ to pyi --- seq_smith/_seq_smith.pyi | 7 +++++++ tests/test_seq_smith.py | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/seq_smith/_seq_smith.pyi b/seq_smith/_seq_smith.pyi index 495b726..2e78a35 100644 --- a/seq_smith/_seq_smith.pyi +++ b/seq_smith/_seq_smith.pyi @@ -18,6 +18,13 @@ class Alignment: @final class AlignmentFragment: + def __init__( + self, + fragment_type: FragmentType, + sa_start: int, + sb_start: int, + length: int, + ) -> None: ... @property def fragment_type(self) -> FragmentType: ... @fragment_type.setter diff --git a/tests/test_seq_smith.py b/tests/test_seq_smith.py index c5fe278..b693855 100644 --- a/tests/test_seq_smith.py +++ b/tests/test_seq_smith.py @@ -469,9 +469,9 @@ def test_local_global_align_overhangs() -> None: aln = local_global_align(seqa, seqb, sm, gap_open=-3, gap_extend=-1) expected_fragments = [ - AlignmentFragment(fragment_type=FragmentType.AGap, sa_start=0, sb_start=0, len=3), - AlignmentFragment(fragment_type=FragmentType.Match, sa_start=0, sb_start=3, len=4), - AlignmentFragment(fragment_type=FragmentType.AGap, sa_start=4, sb_start=7, len=3), + AlignmentFragment(FragmentType.AGap, 0, 0, 3), + AlignmentFragment(FragmentType.Match, 0, 3, 4), + AlignmentFragment(FragmentType.AGap, 4, 7, 3), ] assert aln.fragments == expected_fragments