Skip to content

Commit

Permalink
chore: clean up unused code/docstring notes (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson authored Jan 22, 2024
1 parent bf7129b commit 0bc6969
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 26 deletions.
34 changes: 9 additions & 25 deletions src/dcd_mapping/lookup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
"""Handle API lookups to external (non-MaveDB) services.
This module should contain methods that we don't want to think about caching.
Data sources/handlers include:
* `CoolSeqTool <https://github.com/GenomicMedLab/cool-seq-tool/>`_
* `Gene Normalizer <https://github.com/cancervariants/gene-normalization>`_
* the `VRS-Python Translator tool <https://github.com/ga4gh/vrs-python>`_
* the UniProt web API
"""
import logging
from typing import Dict, List, Optional
from typing import List, Optional

import polars as pl
import requests
Expand Down Expand Up @@ -32,7 +37,6 @@
"get_ucsc_chromosome_name",
"get_chromosome_identifier_from_vrs_id",
"get_sequence",
"store_sequence",
"translate_hgvs_to_vrs",
"get_mane_transcripts",
"get_uniprot_sequence",
Expand Down Expand Up @@ -118,6 +122,8 @@ async def get_transcripts(
"""Get transcript accessions matching given parameters (excluding non-coding RNA).
TODO: may be able to successfully query with only one of gene symbol/chromosome ac.
In initial testing, gene symbol doesn't seem to be a meaningful filter, but should
get further confirmation.
:param gene_symbol: HGNC-given gene symbol (usually, but not always, equivalent to
symbols available in other nomenclatures.)
Expand Down Expand Up @@ -274,10 +280,6 @@ def get_gene_location(metadata: ScoresetMetadata) -> Optional[GeneLocation]:


# --------------------------------- SeqRepo --------------------------------- #
# TODO
# * some of these could be refactored into a single method
# * not clear if all of them are necessary
# * either way, they should all be renamed once we have a final idea of what's needed


def get_chromosome_identifier(chromosome: str) -> str:
Expand Down Expand Up @@ -362,24 +364,6 @@ def get_sequence(
return sequence


def store_sequence(sequence: str, names: List[Dict]) -> None:
"""Store sequnce in SeqRepo.
I'm a little queasy about this part -- it seems potentially dangerous to be
modifying state outside of the mapper library itself, particularly if there
are any needs for those changes to endure (and if there aren't, why are we
modifying outside state in the first place?).
Currently unused unless we really really need this functionality.
:param sequence: raw sequence
:param names: list of namespace/alias pairs,
e.g. ``{"namespace": "GA4GH", "alias": "SQ.XXXXXX"}
"""
sr = CoolSeqToolBuilder().seqrepo_access
sr.sr.store(sequence, nsaliases=names)


# -------------------------------- VRS-Python -------------------------------- #


Expand Down
3 changes: 2 additions & 1 deletion src/dcd_mapping/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def _get_protein_sequence(target_sequence: str) -> str:
"""Get protein sequence if necessary.
It'd be nice if there was a more elegant way to check if the sequence was already a
protein sequence.
protein sequence (it should be possible for protein sequences to contain <5 unique
bases, albeit unlikely with a large enough length).
:param target_sequence: sequence set as baseline in MAVE experiment (might already
be set to protein)
Expand Down

0 comments on commit 0bc6969

Please sign in to comment.