Skip to content

Commit

Permalink
test: bolster align tests (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jan 25, 2024
1 parent cd9573a commit 3a166a0
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 138 deletions.
56 changes: 56 additions & 0 deletions misc/bug_hunting/check_align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging
import pickle

from cool_seq_tool.schemas import Strand

from dcd_mapping.align import align
from dcd_mapping.resources import LOCAL_STORE_PATH, get_scoreset_metadata

logging.basicConfig(
filename="dcd-check-align.log",
format="%(asctime)s %(levelname)s:%(name)s:%(message)s",
level=logging.WARNING,
force=True,
)

_logger = logging.getLogger(__name__)

with open("notebooks/analysis/results/mave_blat.pickle", "rb") as f:
mave_blat_dict = pickle.load(f)

with open(LOCAL_STORE_PATH / "human_urns.txt", "r") as f:
urns = [line.strip() for line in f.readlines()]

strand_reformat = {1: Strand.POSITIVE, -1: Strand.NEGATIVE}


def format_chrom(chrom):
return f"chr{chrom}"


for urn in urns:
print(f"Checking {urn}...")
try:
metadata = get_scoreset_metadata(urn)
alignment = align(metadata, False, True)
except Exception as e:
_logger.error("%s error: %s", urn, e)
continue
if urn not in mave_blat_dict:
continue
original = mave_blat_dict[urn]
try:
for name, actual, expected in [
("chromosome", alignment.chrom, format_chrom(original["chrom"])),
("strand", alignment.strand, strand_reformat[original["strand"]]),
]:
if actual != expected:
_logger.error(
"%s %s mismatch: %s (actual) vs %s (expected)",
urn,
name,
actual,
expected,
)
except Exception as e:
_logger.error("%s exception: %s", urn, e)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ branch = true

[tool.ruff]
src = ["src"]
exclude = ["misc/*"]
# pycodestyle (E, W)
# Pyflakes (F)
# flake8-annotations (ANN)
Expand Down
9 changes: 8 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
"""Provide shared testing utilities."""
"""Provide shared testing utilities.
Notes on test cases:
-------------------
* urn:mavedb:00000068-a-1: TP53, protein-coding, DNA.
"""
import json
import os
from pathlib import Path
Expand Down
32 changes: 32 additions & 0 deletions tests/fixtures/align_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,37 @@
{ "start": 1261199, "end": 12612062 },
{ "start": 12618514, "end": 12618568 }
]
},
"urn:mavedb:00000068-a-1": {
"chrom": "chr17",
"strand": -1,
"coverage": 99.83079526226734,
"ident_pct": 99.91525423728814,
"query_range": { "start": 0, "end": 1180 },
"query_subranges": [
{ "start": 1100, "end": 1180 },
{ "start": 993, "end": 1100 },
{ "start": 919, "end": 993 },
{ "start": 782, "end": 919 },
{ "start": 672, "end": 782 },
{ "start": 559, "end": 672 },
{ "start": 375, "end": 559 },
{ "start": 96, "end": 375 },
{ "start": 74, "end": 96 },
{ "start": 0, "end": 74 }
],
"hit_range": { "start": 7676520, "end": 7669690 },
"hit_subranges": [
{ "start": 7669610, "end": 7669690 },
{ "start": 7670608, "end": 7670715 },
{ "start": 7673534, "end": 7673608 },
{ "start": 7673700, "end": 7673837 },
{ "start": 7674180, "end": 7674290 },
{ "start": 7674858, "end": 7674971 },
{ "start": 7675052, "end": 7675236 },
{ "start": 7675993, "end": 7676272 },
{ "start": 7676381, "end": 7676403 },
{ "start": 7676520, "end": 7676594 }
]
}
}
9 changes: 9 additions & 0 deletions tests/fixtures/scoreset_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@
"target_sequence_type": "dna",
"target_reference_genome": "hg38",
"target_uniprot_ref": { "id": "uniprot:P04049", "offset": 51 }
},
{
"urn": "urn:mavedb:00000068-a-1",
"target_gene_name": "TP53 (P72R)",
"target_gene_category": "Protein coding",
"target_sequence": "ATGGAGGAGCCGCAGTCAGATCCTAGCGTCGAGCCCCCTCTGAGTCAGGAAACATTTTCAGACCTATGGAAACTACTTCCTGAAAACAACGTTCTGTCCCCCTTGCCGTCCCAAGCAATGGATGATTTGATGCTGTCCCCGGACGATATTGAACAATGGTTCACTGAAGACCCAGGTCCAGATGAAGCTCCCAGAATGCCAGAGGCTGCTCCCCGCGTGGCCCCTGCACCAGCAGCTCCTACACCGGCGGCCCCTGCACCAGCCCCCTCCTGGCCCCTGTCATCTTCTGTCCCTTCCCAGAAAACCTACCAGGGCAGCTACGGTTTCCGTCTGGGCTTCTTGCATTCTGGGACAGCCAAGTCTGTGACTTGCACGTACTCCCCTGCCCTCAACAAGATGTTTTGCCAACTGGCCAAGACCTGCCCTGTGCAGCTGTGGGTTGATTCCACACCCCCGCCCGGCACCCGCGTCCGCGCCATGGCCATCTACAAGCAGTCACAGCACATGACGGAGGTTGTGAGGCGCTGCCCCCACCATGAGCGCTGCTCAGATAGCGATGGTCTGGCCCCTCCTCAGCATCTTATCCGAGTGGAAGGAAATTTGCGTGTGGAGTATTTGGATGACAGAAACACTTTTCGACATAGTGTGGTGGTGCCCTATGAGCCGCCTGAGGTTGGCTCTGACTGTACCACCATCCACTACAACTACATGTGTAACAGTTCCTGCATGGGCGGCATGAACCGGAGGCCCATCCTCACCATCATCACACTGGAAGACTCCAGTGGTAATCTACTGGGACGGAACAGCTTTGAGGTGCGTGTTTGTGCCTGTCCTGGGAGAGACCGGCGCACAGAGGAAGAGAATCTCCGCAAGAAAGGGGAGCCTCACCACGAGCTGCCCCCAGGGAGCACTAAGCGAGCACTGCCCAACAACACCAGCTCCTCTCCCCAGCCAAAGAAGAAACCACTGGATGGAGAATATTTCACCCTTCAGATCCGTGGGCGTGAGCGCTTCGAGATGTTCCGAGAGCTGAATGAGGCCTTGGAACTCAAGGATGCCCAGGCTGGGAAGGAGCCAGGGGGGAGCAGGGCTCACTCCAGCCACCTGAAGTCCAAAAAGGGTCAGTCTACCTCCCGCCATAAAAAACTCATGTTCAAGACAGAAGGGCCTGACTCAGACTAG",
"target_sequence_type": "dna",
"target_reference_genome": "hg38",
"target_uniprot_ref": null
}
]
}
48 changes: 48 additions & 0 deletions tests/fixtures/urn:mavedb:00000068-a-1_scores.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
accession,hgvs_nt,hgvs_splice,hgvs_pro,score
urn:mavedb:00000068-a-1#528,NA,NA,p.Gly389Ter,-0.8448754497234633
urn:mavedb:00000068-a-1#546,NA,NA,p.Gly389Cys,-0.5081453414881882
urn:mavedb:00000068-a-1#612,NA,NA,p.His365Ter,-2.226787236679561
urn:mavedb:00000068-a-1#547,NA,NA,p.Gly389=,-0.3114032004197305
urn:mavedb:00000068-a-1#613,NA,NA,p.His365Tyr,-0.36585451364742
urn:mavedb:00000068-a-1#548,NA,NA,p.Gly389Ala,-0.8482077472283238
urn:mavedb:00000068-a-1#549,NA,NA,p.Glu388Ter,0.05530191813883238
urn:mavedb:00000068-a-1#550,NA,NA,p.Glu388Tyr,-0.34329774205619
urn:mavedb:00000068-a-1#551,NA,NA,p.Glu388Trp,-1.5238647488862949
urn:mavedb:00000068-a-1#552,NA,NA,p.Glu388Val,-0.6422615384883972
urn:mavedb:00000068-a-1#553,NA,NA,p.Glu388Thr,-0.8490660813565178
urn:mavedb:00000068-a-1#554,NA,NA,p.Glu388Ser,-1.2839487661086697
urn:mavedb:00000068-a-1#555,NA,NA,p.Glu388Arg,-1.024302690973422
urn:mavedb:00000068-a-1#556,NA,NA,p.Glu388Gln,-0.3114282471795208
urn:mavedb:00000068-a-1#557,NA,NA,p.Glu388Pro,-0.5571635356150665
urn:mavedb:00000068-a-1#558,NA,NA,p.Glu388Asn,-0.5151230575285044
urn:mavedb:00000068-a-1#559,NA,NA,p.Glu388Met,-1.2506258908030141
urn:mavedb:00000068-a-1#614,NA,NA,p.His365Trp,-1.1230151778981163
urn:mavedb:00000068-a-1#560,NA,NA,p.Glu388Leu,-0.6969258713954155
urn:mavedb:00000068-a-1#615,NA,NA,p.His365Val,-0.826647313158234
urn:mavedb:00000068-a-1#561,NA,NA,p.Glu388Lys,0.044449447270876415
urn:mavedb:00000068-a-1#562,NA,NA,p.Glu388Ile,-0.6703081212668602
urn:mavedb:00000068-a-1#616,NA,NA,p.His365Thr,-1.2339323429418732
urn:mavedb:00000068-a-1#563,NA,NA,p.Glu388His,-1.0228758006971406
urn:mavedb:00000068-a-1#617,NA,NA,p.His365Ser,-1.207958290024986
urn:mavedb:00000068-a-1#564,NA,NA,p.Glu388Gly,0.1928432362603335
urn:mavedb:00000068-a-1#565,NA,NA,p.Glu388Phe,-0.7995695025755253
urn:mavedb:00000068-a-1#566,NA,NA,p.Glu388Asp,-0.5734467636981624
urn:mavedb:00000068-a-1#567,NA,NA,p.Glu388Cys,-0.5200106027473981
urn:mavedb:00000068-a-1#618,NA,NA,p.His365Arg,-0.15648499129119786
urn:mavedb:00000068-a-1#568,NA,NA,p.Glu388=,0.13439466320551569
urn:mavedb:00000068-a-1#569,NA,NA,p.Glu388Ala,-0.24796460732041514
urn:mavedb:00000068-a-1#595,NA,NA,p.Ser366Tyr,-0.11899995564102445
urn:mavedb:00000068-a-1#596,NA,NA,p.Ser366Trp,-1.419503452209785
urn:mavedb:00000068-a-1#597,NA,NA,p.Ser366Val,-0.38831518205082655
urn:mavedb:00000068-a-1#598,NA,NA,p.Ser366Thr,-0.843833245301256
urn:mavedb:00000068-a-1#599,NA,NA,p.Ser366Arg,-0.6638684644018105
urn:mavedb:00000068-a-1#600,NA,NA,p.Ser366Gln,-0.4299226799331797
urn:mavedb:00000068-a-1#601,NA,NA,p.Ser366Pro,-0.5476098144406847
urn:mavedb:00000068-a-1#602,NA,NA,p.Ser366Lys,-1.1512867723916729
urn:mavedb:00000068-a-1#603,NA,NA,p.Ser366Ile,-0.6304135202423646
urn:mavedb:00000068-a-1#604,NA,NA,p.Ser366His,-1.1104041758877286
urn:mavedb:00000068-a-1#605,NA,NA,p.Ser366Gly,-0.5861940789803509
urn:mavedb:00000068-a-1#606,NA,NA,p.Ser366Phe,-0.1552908998376018
urn:mavedb:00000068-a-1#607,NA,NA,p.Ser366Glu,-0.8533090945531668
urn:mavedb:00000068-a-1#608,NA,NA,p.Ser366Asp,-1.3609765742847353
urn:mavedb:00000068-a-1#609,NA,NA,p.Ser366Cys,-1.6901296514759063
Loading

0 comments on commit 3a166a0

Please sign in to comment.