Skip to content

Commit

Permalink
add measurementUnit submodel
Browse files Browse the repository at this point in the history
  • Loading branch information
MBueschelberger committed Dec 10, 2024
1 parent 6d6bf16 commit d62607f
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 12 deletions.
7 changes: 7 additions & 0 deletions data2rdf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class Config(BaseSettings):
description="URI to QUDT quantity kind ontology for unit conversion",
)

language: str = Field("en", description="Language for the unit labels")

base_iri: Union[str, AnyUrl] = Field(
"https://www.example.org", description="Base IRI for individuals."
)
Expand Down Expand Up @@ -77,6 +79,11 @@ class Config(BaseSettings):
description="In TBox mode, exclude the title of the ontology in the graph.",
)

dsms_schema_default: bool = Field(
True,
description="""Default value for the `dsms_schema` parameter of the `to_dict` method.""",
)

model_config = ConfigDict(extra="ignore")

@model_validator(mode="after")
Expand Down
55 changes: 53 additions & 2 deletions data2rdf/models/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import warnings
from typing import Any, Dict, List, Optional, Union

from data2rdf.qudt.utils import _get_query_match
from data2rdf.utils import make_prefix
from data2rdf.qudt.utils import _get_qudt_label_and_symbol, _get_query_match
from data2rdf.utils import make_prefix, split_namespace
from data2rdf.warnings import ParserWarning

from data2rdf.models.utils import ( # isort:skip
Expand All @@ -18,6 +18,7 @@
BasicGraphModel,
BasicSuffixModel,
RelationType,
BaseConfigModel,
)

from pydantic import ( # isort:skip
Expand Down Expand Up @@ -109,6 +110,39 @@ def json_ld(self) -> "Dict[str, Any]":
}


class MeasurementUnit(BaseConfigModel):
iri: Union[str, AnyUrl] = Field(
...,
description="Ontological IRI related to the measurement unit",
)
label: Optional[str] = Field(
None,
description="Label of the measurement unit",
)
symbol: Optional[str] = Field(
None,
description="Symbol of the measurement unit",
)
namespace: Optional[str] = Field(
None,
description="Namespace of the measurement unit",
)

@model_validator(mode="after")
@classmethod
def validate_measurement_unit(cls, self) -> "MeasurementUnit":
unit = _get_qudt_label_and_symbol(
self.iri, self.config.qudt_units, self.config.language
)
if not self.label and "label" in unit:
self.label = unit["label"]
if not self.symbol and "symbol" in unit:
self.symbol = unit["symbol"]
if not self.namespace:
self.namespace = split_namespace(self.iri)
return self


class QuantityGraph(BasicGraphModel, BasicSuffixModel):
"""Quantity with or without a discrete value and a unit
E.g. a quantity with a single value and unit _or_
Expand All @@ -133,6 +167,14 @@ class QuantityGraph(BasicGraphModel, BasicSuffixModel):
for mapping the data value to the individual.""",
)

measurement_unit: Optional[MeasurementUnit] = Field(
None,
description="Detailed QUDT Measurement Unit specification",
alias=AliasChoices(
"measurement_unit", "measurementunit", "measurementUnit"
),
)

@field_validator("value", mode="after")
@classmethod
def validate_value(
Expand Down Expand Up @@ -174,6 +216,15 @@ def validate_unit(
value = str(value)
return value

@model_validator(mode="after")
@classmethod
def validate_quantity_graph(cls, self) -> "QuantityGraph":
if not self.measurement_unit and self.unit:
self.measurement_unit = MeasurementUnit(iri=self.unit)
if self.measurement_unit and not self.unit:
self.unit = self.measurement_unit.iri
return self

@property
def json_ld(cls) -> Dict[str, Any]:
"""Return dict of json-ld for graph"""
Expand Down
45 changes: 35 additions & 10 deletions data2rdf/parsers/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data2RDF base model for parsers"""

import json
import warnings
from abc import abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

Expand All @@ -9,7 +10,7 @@
from data2rdf.config import Config
from data2rdf.modes import PipelineMode

from .utils import load_mapping_file
from .utils import generate_id, load_mapping_file

from pydantic import ( # isort:skip
BaseModel,
Expand Down Expand Up @@ -189,15 +190,39 @@ def time_series(self) -> "pd.DataFrame":
return self._time_series

@property
def plain_metadata(self) -> "Dict[str, Any]":
"""Metadata as flat json - without units and iris.
Useful e.g. for the custom properties of the DSMS."""
return {
str(metadatum.iri).split(self.config.separator)[
-1
]: metadatum.value
for metadatum in self.general_metadata
}
def plain_metadata(self) -> List[Dict[str, Any]]:
message = """
`plain_metadata` is deprecated and will be removed in a future version.
Use the `to_dict()` instead."""
warnings.warn(message, DeprecationWarning)
return self.to_dict(dsms_schema=self.config.dsms_schema_default)

def to_dict(self, dsms_schema: bool = False) -> "List[Dict[str, Any]]":
"""Return list of general metadata as DSMS custom properties"""
metadata = []
for metadatum in self.general_metadata:
prop = {
"label": str(metadatum.iri).split(self.config.separator)[-1],
"value": metadatum.value,
}
if hasattr(metadatum, "measurement_unit"):
prop[
"measurementUnit"
] = metadatum.measurement_unit.model_dump(exclude={"config"})
metadata.append(prop)
if dsms_schema:
for metadatum in metadata:
metadatum["id"] = generate_id()
metadata = {
"sections": [
{
"id": generate_id(),
"name": "General",
"entries": metadata,
}
]
}
return metadata


class BaseFileParser(BaseParser):
Expand Down
23 changes: 23 additions & 0 deletions data2rdf/parsers/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Data2RDF parser utilities"""

import json
import random
import string
import time
import warnings
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -216,3 +219,23 @@ def _value_exists(value: "Any") -> bool:
bool: True if the value exists and is valid, otherwise False.
"""
return pd.notnull(value) and value != ""


def generate_id(prefix: str = "id") -> str:
# Generate a unique part using time and random characters
"""
Generates a unique id using a combination of the current time and 6 random characters.
Args:
prefix (str): The prefix to use for the generated id. Defaults to "id".
Returns:
str: The generated id.
"""
unique_part = f"{int(time.time() * 1000)}" # Milliseconds since epoch
random_part = "".join(
random.choices(string.ascii_lowercase + string.digits, k=6) # nosec
)
# Combine prefix, unique part, and random part
generated_id = f"{prefix}{unique_part}{random_part}"
return generated_id
35 changes: 35 additions & 0 deletions data2rdf/qudt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import requests
from rdflib import Graph

from data2rdf.warnings import QUDTMappingWarning


def _qudt_sparql(symbol: str) -> str:
return f"""PREFIX qudt: <http://qudt.org/schema/qudt/>
Expand Down Expand Up @@ -82,3 +84,36 @@ def _check_qudt_mapping(symbol: Optional[str]) -> Optional[str]:
else:
unit = {}
return unit


def _get_qudt_label_and_symbol(
iri: str, qudt_iri: str, language: str
) -> Optional[str]:
graph = _get_qudt_graph(qudt_iri)
gen_query = f"""PREFIX qudt: <http://qudt.org/schema/qudt/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?label ?symbol
WHERE {{
<{iri}> rdfs:label ?label .
<{iri}> qudt:symbol ?symbol .
FILTER (LANG(?label) = "{language}")
}}"""
match = [
{"label": str(row["label"]), "symbol": str(row["symbol"])}
for row in graph.query(gen_query)
]
if len(match) == 0:
warnings.warn(
f"No QUDT label and symbol found for unit with iri `{iri}`.",
QUDTMappingWarning,
)
unit = {}
elif len(match) > 1:
warnings.warn(
f"Multiple QUDT symbols and labels found for unit with iri `{iri}`.",
QUDTMappingWarning,
)
unit = match[0]
else:
unit = match.pop()
return unit
16 changes: 16 additions & 0 deletions data2rdf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,19 @@ def make_prefix(config: "Config") -> str:
else:
prefix = str(config.base_iri)
return prefix


def split_namespace(iri: str) -> tuple[str, str]:
"""
Split the given iri into a namespace and a localname.
Args:
iri: The iri to split.
Returns:
A tuple of the namespace and the localname.
"""
if "#" in iri:
return iri.split("#")[0]
else:
return "/".join(iri.split("/")[:-1])
4 changes: 4 additions & 0 deletions data2rdf/warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ class MappingMissmatchWarning(UserWarning):

class ParserWarning(UserWarning):
"""A warning raised for a specific context set for a parser"""


class QUDTMappingWarning(UserWarning):
"""A warning raised for a specific context set for a QUDT mapping"""
15 changes: 15 additions & 0 deletions tests/abox/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ def test_quantity_model(config) -> None:
assert model.graph.isomorphic(expected_graph)
assert str(model.graph.identifier) == config["graph_identifier"]

assert model.measurement_unit.iri == "http://qudt.org/vocab/unit/MilliM"
assert model.measurement_unit.symbol == "mm"
assert model.measurement_unit.label == "Millimetre"
assert model.measurement_unit.namespace == "http://qudt.org/vocab/unit"


@pytest.mark.parametrize("unit", [unit_string, unit_iri])
def test_valued_quantity(unit):
Expand All @@ -61,6 +66,11 @@ def test_valued_quantity(unit):

assert model.graph.isomorphic(expected_graph)

assert model.measurement_unit.iri == "http://qudt.org/vocab/unit/MilliM"
assert model.measurement_unit.symbol == "mm"
assert model.measurement_unit.label == "Millimetre"
assert model.measurement_unit.namespace == "http://qudt.org/vocab/unit"


def test_bad_with_blank_space():
from rdflib import Graph
Expand All @@ -83,3 +93,8 @@ def test_bad_with_blank_space():
)

assert model.graph.isomorphic(expected_graph)

assert model.measurement_unit.iri == "http://qudt.org/vocab/unit/MilliM"
assert model.measurement_unit.symbol == "mm"
assert model.measurement_unit.label == "Millimetre"
assert model.measurement_unit.namespace == "http://qudt.org/vocab/unit"

0 comments on commit d62607f

Please sign in to comment.