Handle CVX obsolete/replaced/placeholder entries (#171)

cthoyt · web-flow · commit 1b6450a713dd · 2024-02-21T10:53:16.000+01:00
* Skip CVX obsolete/placeholder entries

* Add replacement info
diff --git a/src/pyobo/sources/cvx.py b/src/pyobo/sources/cvx.py
@@ -7,7 +7,7 @@
 
 import pandas as pd
 
-from pyobo import Obo, Term
+from pyobo import Obo, Reference, Term
 
 __all__ = [
     "CVXGetter",
@@ -28,6 +28,11 @@ def iter_terms(self, force: bool = False) -> Iterable[Term]:
         return iter_terms()
 
 
+# This got split, which it's not obvious how to deal with this
+MANUAL_OBSOLETE = {"15"}
+REPLACEMENTS = {"31": "85", "154": "86", "180": "13"}
+
+
 def iter_terms() -> Iterable[Term]:
     """Iterate over terms in CVX."""
     dd = defaultdict(set)
@@ -60,11 +65,22 @@ def iter_terms() -> Iterable[Term]:
         cvx_df[col] = cvx_df[col].map(lambda s: s.strip() if pd.notna(s) else s)
     terms = {}
     for cvx, short_name, full_name, notes, status, nonvaccine, _updated in cvx_df.values:
-        term = Term.from_triple(PREFIX, cvx, full_name)
+        if cvx == "99":
+            continue  # this is a placeholder
+
+        is_obsolete = cvx in MANUAL_OBSOLETE or (pd.notna(notes) and "do not use" in notes.lower())
+        term = Term(
+            reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
+            is_obsolete=is_obsolete,
+        )
         if short_name != full_name:
             term.append_synonym(short_name)
         if pd.notna(notes):
             term.append_comment(notes)
+        if is_obsolete:
+            replacement_identifier = REPLACEMENTS.get(cvx)
+            if replacement_identifier:
+                term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
         if pd.notna(status):
             term.append_property("status", status)
         if pd.notna(nonvaccine):
diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
@@ -53,6 +53,7 @@
     orthologous,
     part_of,
     see_also,
+    term_replaced_by,
 )
 from .utils import comma_separate, obo_escape_slim
 from ..constants import (
@@ -299,6 +300,11 @@ def append_comment(self, value: str) -> "Term":
         self.append_property(comment.curie, value)
         return self
 
+    def append_replaced_by(self, reference: ReferenceHint) -> "Term":
+        """Add a replaced by relationship."""
+        self.append_relationship(term_replaced_by, reference)
+        return self
+
     def append_parent(self, reference: ReferenceHint) -> "Term":
         """Add a parent to this entity."""
         reference = _ensure_ref(reference)