Skip to content

Commit

Permalink
test: add transcript selection checks
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 25, 2024
1 parent 01889fb commit c7f5826
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 68 deletions.
29 changes: 18 additions & 11 deletions misc/bug_hunting/check_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import asyncio
import pickle

from cool_seq_tool.schemas import Strand
from cool_seq_tool.schemas import Strand, TranscriptPriority

from dcd_mapping.align import align
from dcd_mapping.resources import get_scoreset_metadata, get_scoreset_records
Expand All @@ -11,7 +11,7 @@
logging.basicConfig(
filename="dcd-check-transcript.log",
format="%(asctime)s %(levelname)s:%(name)s:%(message)s",
level=logging.WARNING,
level=logging.DEBUG,
force=True,
)
_logger = logging.getLogger(__name__)
Expand All @@ -27,9 +27,6 @@

async def check_tx_results():
for urn in urns:
print(f"Checking {urn}...")
if urn != "urn:mavedb:00000083-i-1":
continue
try:
# prereqs
metadata = get_scoreset_metadata(urn)
Expand All @@ -41,27 +38,35 @@ async def check_tx_results():
try:
tx_result = await select_transcript(metadata, records, alignment)
except Exception as e:
_logger.error("%s error during transcript selection: %s", urn, e)
if urn in expected_mappings:
_logger.error("%s error during transcript selection: %s", urn, e)
else:
_logger.error("%s error during transcript selection (not in expected_mappings): %s", urn, e)
continue

breakpoint()
if urn not in mave_blat_dict:
continue # not in original experiment
if urn not in expected_mappings and tx_result is not None:
_logger.error("%s performed transcript selection when it shouldn't have", urn)
if urn not in expected_mappings:
if tx_result is not None:
_logger.error("%s performed transcript selection when it shouldn't have", urn)
else:
_logger.info("Skipping %s as expected", urn)
continue
if urn in expected_mappings and tx_result is None:
_logger.error("%s skipped transcript selection when it shouldn't have", urn)
continue

expected = expected_mappings[urn]
def reformat_mane(status: str):
return TranscriptPriority[status.replace(" ", "_").upper()]

try:
expected = expected_mappings[urn]
for (name, actual, expected) in [
("NP accession", tx_result.np, expected[0]),
("start position", tx_result.start, expected[1]),
("is full match", tx_result.is_full_match, expected[3]),
("NM accession", tx_result.nm, expected[4]),
("Tx priority", tx_result.transcript_mode, expected[5])
("Tx priority", tx_result.transcript_mode, reformat_mane(expected[5]))
]:
if actual != expected:
_logger.error(
Expand All @@ -74,4 +79,6 @@ async def check_tx_results():
except Exception as e:
_logger.error("%s exception: %s", urn, e)

_logger.info("%s completed successfully", urn) # not a warning but w/e

asyncio.run(check_tx_results())
5 changes: 3 additions & 2 deletions src/dcd_mapping/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Outstanding questions/confusion
-------------------------------
* ``urn:mavedb:00000097-n-1``: unable to find any matching transcripts
* Lots of scoresets (2023-) giving index errors in offset calculation
* Lots of scoresets (esp. 2023-) giving index errors in offset calculation
* Remove MANE sorting once upstream sorting is confirmed
"""
import logging
Expand Down Expand Up @@ -274,7 +274,8 @@ def _offset_target_sequence(metadata: ScoresetMetadata, records: List[ScoreRow])
protein_sequence[i + p1 - p0] == amino_acids_by_position[p1],
protein_sequence[i + p2 - p0] == amino_acids_by_position[p2],
protein_sequence[i + p3 - p0] == amino_acids_by_position[p3],
protein_sequence[i + p4 - p0] == amino_acids_by_position[p4],
protein_sequence[i + p4 - p0]
== amino_acids_by_position[p4], # TODO problem here-ish
]
):
if i + 1 == min(amino_acids_by_position.keys()) or i + 2 == min(
Expand Down
44 changes: 34 additions & 10 deletions tests/fixtures/align_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,6 @@
{ "start": 37403170, "end": 37403325 }
]
},
"urn:mavedb:00000098-a-1": {
"chrom": "chr3",
"strand": -1,
"coverage": 100.0,
"ident_pct": 100.0,
"query_range": { "start": 0, "end": 12 },
"query_subranges": [{ "start": 0, "end": 12 }],
"hit_range": { "start": 38551475, "end": 38551511 },
"hit_subranges": [{ "start": 38551475, "end": 38551511 }]
},
"urn:mavedb:00000018-a-1": {
"chrom": "chr11",
"strand": 1,
Expand All @@ -43,6 +33,40 @@
"hit_range": { "start": 5227021, "end": 5227208 },
"hit_subranges": [{ "start": 5227021, "end": 5227208 }]
},
"urn:mavedb:00000001-a-4": {
"chrom": "chr16",
"strand": 1,
"coverage": 100.0,
"ident_pct": 100.0,
"query_range": { "start": 0, "end": 477 },
"query_subranges": [
{ "start": 0, "end": 66 },
{ "start": 66, "end": 150 },
{ "start": 150, "end": 223 },
{ "start": 223, "end": 333 },
{ "start": 333, "end": 413 },
{ "start": 413, "end": 477 }
],
"hit_range": { "start": 1314030, "end": 1324793 },
"hit_subranges": [
{ "start": 1314030, "end": 1314096 },
{ "start": 1314292, "end": 1314376 },
{ "start": 1315653, "end": 1315726 },
{ "start": 1320173, "end": 1320283 },
{ "start": 1320437, "end": 1320517 },
{ "start": 1324729, "end": 1324793 }
]
},
"urn:mavedb:00000098-a-1": {
"chrom": "chr3",
"strand": -1,
"coverage": 100.0,
"ident_pct": 100.0,
"query_range": { "start": 0, "end": 12 },
"query_subranges": [{ "start": 0, "end": 12 }],
"hit_range": { "start": 38551475, "end": 38551511 },
"hit_subranges": [{ "start": 38551475, "end": 38551511 }]
},
"urn:mavedb:00000061-h-1": {
"chrom": "chr3",
"strand": -1,
Expand Down
55 changes: 31 additions & 24 deletions tests/fixtures/transcript_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,35 @@
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000001-a-4": {
"nm": "NM_003345.5",
"np": "NP_003336.1",
"start": 0,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000098-a-1": {
"nm": "NM_000335.5",
"np": "NP_000326.2",
"start": 1619,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000061-h-1": {
"nm": "NM_002880.4",
"np": "NP_002871.1",
"start": 51,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
}
"urn:mavedb:00000001-a-4": {
"nm": "NM_003345.5",
"np": "NP_003336.1",
"start": 0,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000098-a-1": {
"nm": "NM_000335.5",
"np": "NP_000326.2",
"start": 1619,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000061-h-1": {
"nm": "NM_002880.4",
"np": "NP_002871.1",
"start": 51,
"is_full_match": true,
"transcript_mode": "mane_select",
"sequence": "TODO"
},
"urn:mavedb:00000068-a-1": {
"nm": "NM_000546.6",
"np": "NP_000537.3",
"start": 0,
"is_full_match": false,
"transcript_mode": "mane_select"
}
}
50 changes: 29 additions & 21 deletions tests/unit/test_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
from dcd_mapping.transcripts import select_transcript


def check_transcript_results_equality(actual: TxSelectResult, expected: TxSelectResult):
"""Check equality of transcript selection result vs fixture"""
assert actual.np == expected.np
assert actual.start == expected.start
assert actual.is_full_match is expected.is_full_match
assert actual.nm == expected.nm
assert actual.transcript_mode == expected.transcript_mode


@pytest.mark.asyncio(scope="module")
async def test_tx_src(
scoreset_metadata_fixture: Dict[str, ScoresetMetadata],
Expand All @@ -17,18 +26,12 @@ async def test_tx_src(
"""Test transcript selection for urn:mavedb:00000041-a-1"""
urn = "urn:mavedb:00000041-a-1"
metadata = scoreset_metadata_fixture[urn]
records = get_scoreset_records(urn) # TODO real fixture
records = get_scoreset_records(urn)
alignment_result = align_result_fixture[urn]
expected = transcript_results_fixture[urn]

actual = await select_transcript(metadata, records, alignment_result)

assert actual
assert actual.np == expected.np
assert actual.start == expected.start
assert actual.is_full_match is expected.is_full_match
assert actual.nm == expected.nm
assert actual.transcript_mode == expected.transcript_mode
check_transcript_results_equality(actual, expected)


@pytest.mark.asyncio(scope="module")
Expand All @@ -43,15 +46,9 @@ async def test_tx_scn5a(
records = get_scoreset_records(urn)
alignment_result = align_result_fixture[urn]
expected = transcript_results_fixture[urn]

actual = await select_transcript(metadata, records, alignment_result)

assert actual
assert actual.np == expected.np
assert actual.start == expected.start
assert actual.is_full_match is expected.is_full_match
assert actual.nm == expected.nm
assert actual.transcript_mode == expected.transcript_mode
check_transcript_results_equality(actual, expected)


@pytest.mark.asyncio(scope="module")
Expand All @@ -64,7 +61,6 @@ async def test_tx_hbb(
metadata = scoreset_metadata_fixture[urn]
records = get_scoreset_records(urn)
alignment_result = align_result_fixture[urn]

actual = await select_transcript(metadata, records, alignment_result)
assert actual is None

Expand All @@ -81,11 +77,23 @@ async def test_tx_raf(
records = get_scoreset_records(urn)
alignment_result = align_result_fixture[urn]
expected = transcript_results_fixture[urn]
actual = await select_transcript(metadata, records, alignment_result)
assert actual
check_transcript_results_equality(actual, expected)


@pytest.mark.asyncio(scope="module")
async def test_tx_tp53(
scoreset_metadata_fixture: Dict[str, ScoresetMetadata],
align_result_fixture: Dict[str, AlignmentResult],
transcript_results_fixture: Dict[str, TxSelectResult],
):
"""Test transcript selection for urn:mavedb:00000068-a-1"""
urn = "urn:mavedb:00000068-a-1"
metadata = scoreset_metadata_fixture[urn]
records = get_scoreset_records(urn)
alignment_result = align_result_fixture[urn]
expected = transcript_results_fixture[urn]
actual = await select_transcript(metadata, records, alignment_result)
assert actual
assert actual.np == expected.np
assert actual.start == expected.start
assert actual.is_full_match is expected.is_full_match
assert actual.nm == expected.nm
assert actual.transcript_mode == expected.transcript_mode
check_transcript_results_equality(actual, expected)

0 comments on commit c7f5826

Please sign in to comment.