chore: clean up unused code/docstring notes (#10)

GenomicMedLab · Jan 22, 2024 · 0bc6969 · 0bc6969
1 parent bf7129b
commit 0bc6969
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 26 deletions.
diff --git a/src/dcd_mapping/lookup.py b/src/dcd_mapping/lookup.py
@@ -1,9 +1,14 @@
 """Handle API lookups to external (non-MaveDB) services.
 
-This module should contain methods that we don't want to think about caching.
+Data sources/handlers include:
+
+* `CoolSeqTool <https://github.com/GenomicMedLab/cool-seq-tool/>`_
+* `Gene Normalizer <https://github.com/cancervariants/gene-normalization>`_
+* the `VRS-Python Translator tool <https://github.com/ga4gh/vrs-python>`_
+* the UniProt web API
 """
 import logging
-from typing import Dict, List, Optional
+from typing import List, Optional
 
 import polars as pl
 import requests
@@ -32,7 +37,6 @@
     "get_ucsc_chromosome_name",
     "get_chromosome_identifier_from_vrs_id",
     "get_sequence",
-    "store_sequence",
     "translate_hgvs_to_vrs",
     "get_mane_transcripts",
     "get_uniprot_sequence",
@@ -118,6 +122,8 @@ async def get_transcripts(
     """Get transcript accessions matching given parameters (excluding non-coding RNA).
 
     TODO: may be able to successfully query with only one of gene symbol/chromosome ac.
+    In initial testing, gene symbol doesn't seem to be a meaningful filter, but should
+    get further confirmation.
 
     :param gene_symbol: HGNC-given gene symbol (usually, but not always, equivalent to
         symbols available in other nomenclatures.)
@@ -274,10 +280,6 @@ def get_gene_location(metadata: ScoresetMetadata) -> Optional[GeneLocation]:
 
 
 # --------------------------------- SeqRepo --------------------------------- #
-# TODO
-# * some of these could be refactored into a single method
-# * not clear if all of them are necessary
-# * either way, they should all be renamed once we have a final idea of what's needed
 
 
 def get_chromosome_identifier(chromosome: str) -> str:
@@ -362,24 +364,6 @@ def get_sequence(
     return sequence
 
 
-def store_sequence(sequence: str, names: List[Dict]) -> None:
-    """Store sequnce in SeqRepo.
-
-    I'm a little queasy about this part -- it seems potentially dangerous to be
-    modifying state outside of the mapper library itself, particularly if there
-    are any needs for those changes to endure (and if there aren't, why are we
-    modifying outside state in the first place?).
-
-    Currently unused unless we really really need this functionality.
-
-    :param sequence: raw sequence
-    :param names: list of namespace/alias pairs,
-        e.g. ``{"namespace": "GA4GH", "alias": "SQ.XXXXXX"}
-    """
-    sr = CoolSeqToolBuilder().seqrepo_access
-    sr.sr.store(sequence, nsaliases=names)
-
-
 # -------------------------------- VRS-Python -------------------------------- #
 
 

diff --git a/src/dcd_mapping/transcripts.py b/src/dcd_mapping/transcripts.py
@@ -138,7 +138,8 @@ def _get_protein_sequence(target_sequence: str) -> str:
     """Get protein sequence if necessary.
 
     It'd be nice if there was a more elegant way to check if the sequence was already a
-    protein sequence.
+    protein sequence (it should be possible for protein sequences to contain <5 unique
+    bases, albeit unlikely with a large enough length).
 
     :param target_sequence: sequence set as baseline in MAVE experiment (might already
         be set to protein)