diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be280f5..c139ee3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,27 +1,53 @@ name: Run tests on: - push: - branches: [ development ] - pull_request: - branches: [ master, development ] + push: + branches: [development] + pull_request: + branches: [master, development] jobs: - run-tests: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - toolchain: [stable] - - steps: - - uses: actions/checkout@v4 - - - name: Install ${{ matrix.toolchain }} - uses: dtolnay/rust-toolchain@master - with: - toolchain: ${{ matrix.toolchain }} - - - name: Run tests - run: cargo test --release + run-tests: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + toolchain: [stable] + + steps: + - uses: actions/checkout@v4 + + - name: Install ${{ matrix.toolchain }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.toolchain }} + + - name: Run tests + run: cargo test --release + + run-all-features: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install stable + uses: dtolnay/rust-toolchain@master + with: + toolchain: "stable" + + - name: Run tests + run: cargo test --features serde,pyo3 --release + + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install stable + uses: dtolnay/rust-toolchain@master + with: + toolchain: "stable" + + - name: Check formatting of Rust code with rustfmt + uses: actions-rust-lang/rustfmt@v1.1.1 diff --git a/Cargo.toml b/Cargo.toml index ae6e778..1880d08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,12 @@ [package] name = "ontolius" -version = "0.7.1" +version = "0.7.2" description = "A fast and safe crate for working with biomedical ontologies." keywords = ["ontology", "bioinformatics", "HPO", "MAxO", "GO"] edition = "2021" -homepage = "https://github.com/ielis/ontolius" -repository = "https://github.com/ielis/ontolius" +homepage = "https://github.com/P2GX/ontolius" +repository = "https://github.com/P2GX/ontolius" readme = "README.md" license-file = "LICENSE" @@ -23,16 +23,20 @@ pyo3 = { version = "0.24.1", optional = true, features = ["abi3-py310"] } # The dependency restriction can be removed after the error is fixed. # https://github.com/neo4j-labs/graph/issues/138 rayon = { version = "=1.10.0", optional = true } +serde = { version = "1.0.228", optional = true } [dev-dependencies] flate2 = "1.0.30" criterion = "0.5.1" +serde = "1.0.228" +serde_test = "1.0.177" [features] default = ["obographs", "csr"] csr = ["dep:graph_builder", "dep:rayon"] obographs = ["dep:obographs-dev", "dep:curieosa"] pyo3 = ["dep:pyo3"] +serde = ["dep:serde"] [[bench]] name = "hierarchy_io" diff --git a/README.md b/README.md index cd63c59..2209b5d 100644 --- a/README.md +++ b/README.md @@ -163,9 +163,10 @@ At this time, support for the following ontologies is tested: * Human Phenotype Ontology (HPO) * Gene Ontology (GO) * Medical Action Ontology (MAxO) +* Units of Measurement Ontology (UO) Other ontologies are very likely to work too. -In case of any problems, please let us know on our [Issue tracker](https://github.com/ielis/ontolius/issues). +In case of any problems, please let us know on our [Issue tracker](https://github.com/P2GX/ontolius/issues). ## Features @@ -178,6 +179,7 @@ by default: * `obographs` `(*)` - support loading Ontology from Obographs JSON file * `pyo3` - include [`crate::py`] module with PyO3 bindings to selected data structs to support using from Python +* `serde` - to provide (de)serialization functions to map [`crate::TermId`] to/from a curie (see `tests/test_serde.rs` for an example) ## Run tests diff --git a/resources/uo/uo.json.gz b/resources/uo/uo.json.gz new file mode 100644 index 0000000..72c3233 Binary files /dev/null and b/resources/uo/uo.json.gz differ diff --git a/src/common.rs b/src/common.rs index 090359a..267f1dd 100644 --- a/src/common.rs +++ b/src/common.rs @@ -41,6 +41,16 @@ pub mod maxo { pub static MEDICAL_ACTION: TermId = make_term_id(KnownPrefix::MAXO, 1, 7); } +/// Constants for working with Unit of Measurement Ontology (UO). +pub mod uo { + use crate::{term_id::KnownPrefix, TermId}; + + use super::make_term_id; + /// [unit (UO:0000000)](http://purl.obolibrary.org/obo/UO_0000000) + /// is the root of all terms in the UO. + pub static UNIT: TermId = make_term_id(KnownPrefix::UO, 0, 7); +} + /// Constants for working with Gene Ontology (GO). pub mod go { use crate::{term_id::KnownPrefix, TermId}; diff --git a/src/term_id.rs b/src/term_id.rs index 0fe76d8..48b9b12 100644 --- a/src/term_id.rs +++ b/src/term_id.rs @@ -232,6 +232,56 @@ impl Display for TermId { } } +/// Support writing out a [`TermId`] as a curie +/// when working with `serde`. +#[cfg(feature = "serde")] +mod serde { + + use std::str::FromStr; + + use super::TermId; + + impl TermId { + pub fn serialize_as_curie(term_id: &TermId, serializer: S) -> Result + where + S: serde::Serializer, + { + // Sadly, we must allocate a string to serialize a curie. + let curie = term_id.to_string(); + serializer.serialize_str(&curie) + } + + pub fn deserialize_from_curie<'de, D>(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct TermIdCurieVisitor; + + impl<'de> serde::de::Visitor<'de> for TermIdCurieVisitor { + type Value = TermId; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "{}", "a curie (e.g. \"HP:0001250\")") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + match TermId::from_str(v) { + Ok(term_id) => Ok(term_id), + Err(_e) => Err(serde::de::Error::invalid_value( + serde::de::Unexpected::Str(v), + &self, + )), + } + } + } + deserializer.deserialize_str(TermIdCurieVisitor) + } + } +} + /// The representation of the prefix of a [`TermId`]. /// /// ### Examples @@ -290,6 +340,15 @@ impl PartialEq for Prefix<'_> { } } +/// Prefix can be tested for equality with a `&str`. +/// +/// ``` +/// use ontolius::TermId; +/// +/// let term_id: TermId = "HP:0001250".parse().unwrap(); +/// +/// assert!(&term_id.prefix() == "HP"); +/// ``` impl PartialEq for Prefix<'_> { fn eq(&self, other: &str) -> bool { match &self.0 .0 { @@ -408,6 +467,7 @@ pub(crate) enum KnownPrefix { CHEBI, NCIT, PMID, + UO, } impl PartialEq for KnownPrefix { @@ -424,6 +484,7 @@ impl PartialEq for KnownPrefix { KnownPrefix::CHEBI => other == "CHEBI", KnownPrefix::NCIT => other == "NCIT", KnownPrefix::PMID => other == "PMID", + KnownPrefix::UO => other == "UO", } } } @@ -442,6 +503,7 @@ impl Display for KnownPrefix { KnownPrefix::CHEBI => f.write_str("CHEBI"), KnownPrefix::NCIT => f.write_str("NCIT"), KnownPrefix::PMID => f.write_str("PMID"), + KnownPrefix::UO => f.write_str("UO"), } } } @@ -474,6 +536,8 @@ impl TryFrom<&str> for KnownPrefix { Ok(KnownPrefix::NCIT) } else if value.starts_with("PMID") { Ok(KnownPrefix::PMID) + } else if value.starts_with("UO") { + Ok(KnownPrefix::UO) } else { Err(()) } diff --git a/tests/test_common.rs b/tests/test_common.rs index 05b83dc..c5407c3 100644 --- a/tests/test_common.rs +++ b/tests/test_common.rs @@ -1,4 +1,4 @@ -use ontolius::common::{go, hpo, maxo}; +use ontolius::common::{go, hpo, maxo, uo}; #[test] fn hpo_commons_are_accessible() { @@ -18,3 +18,8 @@ fn go_commons_are_accessible() { fn maxo_commons_are_accessible() { assert_eq!(maxo::MEDICAL_ACTION, ("MAXO", "0000001")) } + +#[test] +fn uo_commons_are_accessible() { + assert_eq!(uo::UNIT, ("UO", "0000000")) +} diff --git a/tests/test_io.rs b/tests/test_io.rs index 29d4eb8..2d311af 100644 --- a/tests/test_io.rs +++ b/tests/test_io.rs @@ -364,3 +364,88 @@ mod medical_action_ontology { _ = maxo.version(); } } + +/// Unit of Measurement Ontology (UO) tests. +mod unit_measurement_ontology { + + use std::fs::File; + use std::io::BufReader; + use std::sync::OnceLock; + + use flate2::bufread::GzDecoder; + use ontolius::common::uo::UNIT; + use ontolius::io::OntologyLoaderBuilder; + use ontolius::ontology::csr::MinimalCsrOntology; + use ontolius::ontology::{HierarchyWalks, MetadataAware, OntologyTerms}; + use ontolius::term::MinimalTerm; + use ontolius::TermId; + + const UO_PATH: &str = "resources/uo/uo.json.gz"; + + fn uo() -> &'static MinimalCsrOntology { + static ONTOLOGY: OnceLock = OnceLock::new(); + ONTOLOGY.get_or_init(|| { + let reader = GzDecoder::new(BufReader::new( + File::open(UO_PATH).expect("Obographs JSON file should exist"), + )); + let loader = OntologyLoaderBuilder::new().obographs_parser().build(); + loader + .load_from_read(reader) + .expect("Obographs JSON should be well formatted") + }) + } + + macro_rules! test_ancestors { + ($($ontology: expr, $curie: expr, $expected: expr)*) => { + $( + let query: TermId = $curie.parse().unwrap(); + + let mut names: Vec<_> = $ontology + .iter_ancestor_ids(&query) + .map(|tid| $ontology.term_by_id(tid).map(MinimalTerm::name).unwrap()) + .collect(); + names.sort(); + assert_eq!( + names, + $expected, + ); + )* + }; + } + + #[test] + fn iter_ancestor_ids() { + let uo = uo(); + + test_ancestors!( + uo, + "UO:0010002", // millisiemens + &[ + "conduction unit", + "electrical conduction unit", + "siemens based unit", + "unit" + ] + ); + test_ancestors!( + uo, + "UO:0000010", // second + &["base unit", "second based unit", "time unit", "unit"] + ); + } + + #[test] + fn we_get_expected_descendant_counts_for_uo_root() { + let uo = uo(); + + let descendant_count = uo.iter_descendant_ids(&UNIT).count(); + assert_eq!(descendant_count, 549); + } + + #[test] + fn version_parsing() { + let uo = uo(); + + assert_eq!(uo.version(), "2026-01-09"); + } +} diff --git a/tests/test_serde.rs b/tests/test_serde.rs new file mode 100644 index 0000000..9c92e42 --- /dev/null +++ b/tests/test_serde.rs @@ -0,0 +1,72 @@ +/// An example of serializing a TermId as a CURIE. +/// +/// When working with types that use [`ontolius::TermId`]s and their serialization, +/// we would like to derive both [`serde::Serialize`] and [`serde::Deserialize`] traits +/// to enable interoperability with the `serde` crate. +/// However, we cannot do this directly since [`ontolius::TermId`] does not implement the traits. +/// +/// As a workaround, `serde`` allows using custom serialization and deserialization functions +/// and `ontolius` provides functions to use with (de)serialization. +/// +/// The functions are available on [`ontolius::TermId`] when the `serde` feature is enabled. +/// An example usage is shown in this module. +#[cfg(feature = "serde")] +mod test_serde { + + use serde; + use serde_test::{assert_de_tokens_error, assert_tokens, Token}; + + use ontolius::TermId; + + /// An example struct that we want to serialize and deserialize with serde. + /// + /// Use the `serde` attributes on the `term_id` field + /// to serialize the `TermId` as CURIE. + #[derive(PartialEq, Debug, serde::Serialize, serde::Deserialize)] + struct Feature { + #[serde( + serialize_with = "TermId::serialize_as_curie", + deserialize_with = "TermId::deserialize_from_curie" + )] + term_id: TermId, + } + + /// Test that serializing `Feature` produces the expected Serde tokens and that + /// a valid `Feature` can be created by deserializing those tokens. + #[test] + fn test_serialize() { + let feature = Feature { + term_id: TermId::from(("HP", "0001250")), + }; + + assert_tokens( + &feature, + &[ + Token::Struct { + name: "Feature", + len: 1, + }, + Token::Str("term_id"), + Token::Str("HP:0001250"), + Token::StructEnd, + ], + ) + } + + #[test] + fn test_malformed_curie_produces_an_error() { + let tokens = [ + Token::Struct { + name: "Feature", + len: 1, + }, + Token::Str("term_id"), + Token::Str("INVALID"), + Token::StructEnd, + ]; + assert_de_tokens_error::( + &tokens, + "invalid value: string \"INVALID\", expected a curie (e.g. \"HP:0001250\")", + ); + } +}