Skip to content

Commit

Permalink
added uniprot accession pattern query
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 committed Oct 3, 2024
1 parent 66b2efe commit 44a6a87
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
34 changes: 21 additions & 13 deletions src/hub/dataload/sources/chembl/parser.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
import json
import re


def parse_data(data):
UNIPROT_ACCESSION_PATTERN = re.compile(
r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"
)

for item in data["targets"]:
output = {
"chembl_target": item["target_chembl_id"],
"xrefs": {
"accession": (
[
target_component.get("accession")
for target_component in item["target_components"]
if "accession" in target_component
]
),
},
}
if output["xrefs"]["accession"]:
uniprot_accessions = []
accessions = (
component["accession"].rstrip()
for component in item.get("target_components")
if component["accession"]
)
for accession in accessions:
if UNIPROT_ACCESSION_PATTERN.fullmatch(accession):
uniprot_accessions.append(accession)
if uniprot_accessions:
output = {
"chembl_target": item["target_chembl_id"],
"xrefs": {
"accession": uniprot_accessions,
},
}
yield output


Expand Down
2 changes: 1 addition & 1 deletion src/hub/dataload/sources/chembl/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class ChemblUploader(uploader.BaseSourceUploader):

keylookup = MyGeneKeyLookup(
[
("uniprot", "chembl.xrefs.accession"),
("swissprot", "chembl.xrefs.accession"),
],
skip_on_failure=True,
)
Expand Down

0 comments on commit 44a6a87

Please sign in to comment.