Skip to content

Commit 58ab2b8

Browse files
authored
Update synonym parsing to handle OMO CURIEs (#166)
* Update synonym parsing * Handle preferred curies * Enable searching for synonym type even without specificity
1 parent dc9ad7d commit 58ab2b8

File tree

2 files changed

+35
-14
lines changed

2 files changed

+35
-14
lines changed

src/pyobo/reader.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -444,20 +444,23 @@ def _extract_synonym(
444444
break
445445

446446
stype: Optional[SynonymTypeDef] = None
447-
if specificity is not None: # go fishing for a synonym type definition
448-
for _stype in synonym_typedefs.values():
449-
# Since there aren't a lot of carefully defined synonym definitions, it
450-
# can appear as a string or curie. Therefore, we might see temporary prefixes
451-
# get added, so we should check against full curies as well as local unique
452-
# identifiers
453-
if rest.startswith(_stype.curie):
454-
rest = rest[len(_stype.curie) :].strip()
455-
stype = _stype
456-
break
457-
elif rest.startswith(_stype.identifier):
458-
rest = rest[len(_stype.identifier) :].strip()
459-
stype = _stype
460-
break
447+
for _stype in synonym_typedefs.values():
448+
# Since there aren't a lot of carefully defined synonym definitions, it
449+
# can appear as a string or curie. Therefore, we might see temporary prefixes
450+
# get added, so we should check against full curies as well as local unique
451+
# identifiers
452+
if rest.startswith(_stype.curie):
453+
rest = rest[len(_stype.curie) :].strip()
454+
stype = _stype
455+
break
456+
elif rest.startswith(_stype.preferred_curie):
457+
rest = rest[len(_stype.preferred_curie) :].strip()
458+
stype = _stype
459+
break
460+
elif rest.startswith(_stype.identifier):
461+
rest = rest[len(_stype.identifier) :].strip()
462+
stype = _stype
463+
break
461464

462465
if not rest.startswith("[") or not rest.endswith("]"):
463466
logger.warning("[%s:%s] problem with synonym: %s", prefix, identifier, s)

tests/test_get.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
iterate_node_synonyms,
2020
iterate_node_xrefs,
2121
)
22+
from pyobo.struct.struct import acronym
2223
from tests.constants import TEST_CHEBI_OBO_PATH, chebi_patch
2324

2425

@@ -105,6 +106,7 @@ def test_extract_synonym(self):
105106
iupac_name = SynonymTypeDef.from_text("IUPAC NAME", lower=False)
106107
synoynym_typedefs = {
107108
"IUPAC_NAME": iupac_name,
109+
acronym.curie: acronym,
108110
}
109111

110112
for expected_synonym, text in [
@@ -137,6 +139,22 @@ def test_extract_synonym(self):
137139
Synonym(name="LTEC I", specificity="EXACT"),
138140
'"LTEC I" []',
139141
),
142+
(
143+
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
144+
'"HAdV-A" BROAD OMO:0003012 []',
145+
),
146+
(
147+
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
148+
'"HAdV-A" BROAD omo:0003012 []',
149+
),
150+
(
151+
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
152+
'"HAdV-A" OMO:0003012 []',
153+
),
154+
(
155+
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
156+
'"HAdV-A" omo:0003012 []',
157+
),
140158
]:
141159
with self.subTest(s=text):
142160
actual_synonym = _extract_synonym(

0 commit comments

Comments
 (0)