Skip to content

Commit

Permalink
Fix/optional title (#63)
Browse files Browse the repository at this point in the history
* add more docs strings

* make ontology title optional, add pytest, remove unneeded parser argument from tbox pytest

* update github ci for pytests, make title and authors optional, update pytest

* update pytests
  • Loading branch information
MBueschelberger authored Aug 20, 2024
1 parent 30f048c commit caa1782
Show file tree
Hide file tree
Showing 17 changed files with 305 additions and 43 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ jobs:
python -m pip freeze
- name: Run tests
run: |
pytest -v
- name: Make coverage
if: ${{ matrix.python-version == '3.12' }}
run: |
pytest --junitxml=pytest.xml --cov=data2rdf | tee pytest-coverage.txt
Expand Down
5 changes: 5 additions & 0 deletions data2rdf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,9 @@ class Config(BaseSettings):
This will be suppressed if enabled.""",
)

exclude_ontology_title: bool = Field(
False,
description="In TBox mode, exclude the title of the ontology in the graph.",
)

model_config = ConfigDict(extra="ignore")
33 changes: 28 additions & 5 deletions data2rdf/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,19 @@ def graph(cls) -> "Graph":
@model_validator(mode="after")
@classmethod
def run_parser(cls, self: "BaseParser") -> "BaseParser":
"""Run parser"""
"""
Runs the parser for the given data file and mapping.
This function is a class method that takes in a `self` parameter, which is an instance of the `BaseParser` class.
It loads the data file using the `_load_data_file` method and loads the mapping file using the `load_mapping_file` function.
It then runs the parser using the `_run_parser` method and returns the parsed `BaseParser` instance.
Args:
self (BaseParser): The instance of the `BaseParser` class.
Returns:
BaseParser: The parsed `BaseParser` instance.
"""

datafile: Any = cls._load_data_file(self)
mapping: "Dict[str, BaseParser]" = load_mapping_file(
Expand All @@ -125,17 +137,19 @@ class TBoxBaseParser(AnyBoxBaseParser):
)

version_info: Optional[str] = Field(
"1.0.0", description="Version of the ontplpgy"
None, description="Version of the ontplpgy"
)

ontology_iri: Optional[Union[str, AnyUrl]] = Field(
None, description="General IRI of the ontology."
)

ontology_title: str = Field(..., description="Title of the ontology")
ontology_title: Optional[str] = Field(
None, description="Title of the ontology"
)

authors: List[str] = Field(
..., description="Name of the authors contributing to the ontology."
authors: Optional[List[str]] = Field(
None, description="Name of the authors contributing to the ontology."
)

_classes: Any = PrivateAttr()
Expand Down Expand Up @@ -227,6 +241,15 @@ def tbox(self) -> "TBoxBaseParser":
@model_validator(mode="after")
@classmethod
def execute_parser(cls, self: "BaseFileParser") -> "BaseFileParser":
"""
Validates the parser model and executes the parser based on the specified mode.
Args:
self: An instance of the BaseFileParser class.
Returns:
An instance of the BaseFileParser class with the parser executed.
"""
arguments = {
"mapping": self.mapping,
"raw_data": self.raw_data,
Expand Down
49 changes: 46 additions & 3 deletions data2rdf/parsers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,20 @@ def _run_parser(
datafile: StringIO,
mapping: "List[TBoxBaseMapping]",
) -> None:
"""Run excel parser in tbox mode"""
"""
Class method for running the CSVTBoxParser. This method reads a CSV file
into a pandas DataFrame and then uses the provided mapping to create TBox
classes.
Parameters:
self (CSVTBoxParser): The instance of the parser.
datafile (StringIO): The CSV file to be parsed.
mapping (List[TBoxBaseMapping]): The list of mappings to be applied.
Returns:
None
"""

df = pd.read_csv(datafile, sep=self.column_sep)
_make_tbox_classes(self, df, mapping)

Expand Down Expand Up @@ -131,7 +144,23 @@ def mapping_model(cls) -> ABoxBaseMapping:
# OVERRIDE
@property
def json_ld(cls) -> "Dict[str, Any]":
"""Return dict for json-ld for the graph in abox mode"""
"""
Returns a JSON-LD representation of the CSV data in ABox mode.
This method generates a JSON-LD object that describes the CSV data,
including its metadata, time series data, and relationships between them.
The returned JSON-LD object is in the format of a csvw:TableGroup,
which contains one or more csvw:Table objects. Each csvw:Table object
represents a table in the CSV data, and contains information about its
columns, rows, and relationships to other tables.
The JSON-LD object also includes context information, such as namespace
prefixes and base URLs, to help with serialization and deserialization.
Returns:
Dict[str, Any]: A JSON-LD object representing the CSV data in ABox mode.
"""

if not cls.config.suppress_file_description:
tables = []
Expand Down Expand Up @@ -273,7 +302,21 @@ def _run_parser(
mapping: "List[ABoxBaseMapping]",
) -> None:
"""
Parse metadata, time series metadata and time series
This function is responsible for parsing metadata, time series metadata, and time series data from a CSV file.
It takes in three parameters:
- `self`: The CSVParser instance.
- `datafile`: The StringIO object containing the CSV data.
- `mapping`: A list of ABoxBaseMapping instances that map the CSV data to the desired output format.
The function returns None, but it populates the following instance variables:
- `self._general_metadata`: A list of PropertyGraph or QuantityGraph instances representing the general metadata.
- `self._time_series_metadata`: A list of QuantityGraph instances representing the time series metadata.
- `self._time_series`: A pandas DataFrame containing the time series data.
The function also raises ValueError if the `metadata_length` is greater than 0 but `metadata_sep` is not set.
It raises TypeError if the unit for a key is not a string.
It raises MappingMissmatchWarning if no match is found in the mapping for a key.
"""

mapping = {model.key: model for model in mapping}
Expand Down
38 changes: 35 additions & 3 deletions data2rdf/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,23 @@ def _run_parser(
datafile: BytesIO,
mapping: "List[TBoxBaseMapping]",
) -> None:
"""Run excel parser in tbox mode"""
"""
Run excel parser in tbox mode.
Parameters
----------
self : ExcelTBoxParser
The instance of the parser.
datafile : BytesIO
The excel file to be parsed.
mapping : List[TBoxBaseMapping]
The list of mappings to be applied.
Returns
-------
None
This function does not return any value.
"""
df = pd.read_excel(datafile, sheet_name=self.sheet)
_make_tbox_classes(self, df, mapping)

Expand Down Expand Up @@ -122,7 +138,15 @@ def mapping_model(cls) -> ABoxExcelMapping:
# OVERRIDE
@property
def json_ld(cls) -> Dict[str, Any]:
"""Make the json-ld if pipeline is in abox-mode"""
"""
Returns the JSON-LD representation of the data in ABox mode.
The JSON-LD is constructed based on the metadata and time series data.
If the file description is not suppressed, it includes the metadata and time series data tables.
Otherwise, it returns a list of JSON-LD representations of the individual models.
:return: A dictionary representing the JSON-LD data.
"""

if not cls.config.suppress_file_description:
tables = []
Expand Down Expand Up @@ -256,7 +280,15 @@ def _run_parser(
mapping: "List[ABoxExcelMapping]",
) -> None:
"""
Parse metadata, time series metadata and time series
Parses the metadata, time series metadata, and time series from an Excel file.
Args:
self (ExcelABoxParser): The instance of the ExcelABoxParser class.
datafile (BytesIO): The file object containing the Excel file.
mapping (List[ABoxExcelMapping]): The list of mappings to use for parsing.
Returns:
None: This function does not return anything.
"""

mapping = {model.key: model for model in mapping}
Expand Down
48 changes: 45 additions & 3 deletions data2rdf/parsers/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,18 @@ def _run_parser(
datafile: "List[Dict[str, Any]]",
mapping: "Dict[str, TBoxBaseMapping]",
) -> None:
"""Run parser in TBox mode"""
"""
Runs the parser in TBox mode.
Args:
self: An instance of JsonTBoxParser.
datafile: A list of dictionaries containing the data to be parsed.
mapping: A dictionary containing the mapping of the data.
Returns:
None
"""

df = pd.DataFrame(datafile)
_make_tbox_classes(self, df, mapping)

Expand Down Expand Up @@ -98,6 +109,18 @@ def mapping_model(cls) -> ABoxJsonMapping:
# OVERRIDE
@property
def json_ld(cls) -> Dict[str, Any]:
"""
Returns the JSON-LD representation of the parser's data.
This method generates the JSON-LD representation of the parser's data,
including the context, id, type, and members. The members are generated
based on the general metadata and time series metadata.
The method returns a dictionary containing the JSON-LD representation.
:return: A dictionary containing the JSON-LD representation.
:rtype: Dict[str, Any]
"""
if not cls.config.suppress_file_description:
members = []

Expand Down Expand Up @@ -202,6 +225,16 @@ def json_ld(cls) -> Dict[str, Any]:
# OVERRIDE
@classmethod
def _load_data_file(cls, self: "JsonABoxParser") -> "Dict[str, Any]":
"""
Class method for loading data file.
Args:
cls: The class of the parser.
self: An instance of JsonABoxParser.
Returns:
Dict[str, Any]: The loaded data file.
"""
return _load_data_file(self)

# OVERRIDE
Expand All @@ -213,9 +246,18 @@ def _run_parser(
mapping: "List[ABoxJsonMapping]",
) -> None:
"""
Parse metadata, time series metadata and time series
"""
Class method for parsing metadata, time series metadata,
and time series from a given data file and mapping.
Args:
self: An instance of JsonABoxParser.
datafile: A dictionary containing the data to be parsed.
mapping: A list of ABoxJsonMapping objects defining the
mapping from the data to the ABox.
Returns:
None
"""
self._general_metadata = []
self._time_series_metadata = []
self._time_series = {}
Expand Down
47 changes: 32 additions & 15 deletions data2rdf/parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,43 @@ def _make_tbox_classes(
self._classes.append(subgraph)


def _make_tbox_json_ld(cls: "TBoxBaseParser") -> "Dict[str, Any]":
ontology_iri = cls.ontology_iri or cls.config.base_iri
def _make_tbox_json_ld(model: "TBoxBaseParser") -> "Dict[str, Any]":
ontology_iri = model.ontology_iri or model.config.base_iri
classes = [model.json_ld for model in model.classes]
if not model.config.exclude_ontology_title:
if model.ontology_title:
title = {"dcterms:title": model.ontology_title}
else:
title = {}
if model.authors:
authors = {
"dcterms:creator": [
{"@type": "foaf:Person", "foaf:name": author}
for author in model.authors
]
}
else:
authors = {}
if model.version_info:
version = {"owl:versionInfo": model.version_info}
else:
version = {}

classes += [
{
"@id": str(ontology_iri),
"@type": "owl:Ontology",
**title,
**authors,
**version,
},
]
return {
"@context": {
"owl": "http://www.w3.org/2002/07/owl#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"dcterms": "http://purl.org/dc/terms/",
"foaf": "http://xmlns.com/foaf/spec/",
},
"@graph": [model.json_ld for model in cls.classes]
+ [
{
"@id": str(ontology_iri),
"@type": "owl:Ontology",
"dcterms:title": cls.ontology_title,
"owl:versionInfo": cls.version_info,
"dcterms:creator": [
{"@type": "foaf:Person", "foaf:name": author}
for author in cls.authors
],
},
],
"@graph": classes,
}
30 changes: 28 additions & 2 deletions data2rdf/pipelines/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,23 @@ def run_pipeline(cls, self: "Data2RDF") -> "Data2RDF":

@property
def json_ld(cls) -> Dict[str, Any]:
"""Return dict of json-ld for graph"""
"""
Returns a dictionary of JSON-LD for the graph based on the pipeline mode.
If the pipeline mode is ABOX, it returns a dictionary containing the context,
id, type, and distribution information of the dataset. If the
`suppress_file_description` config is False, it also includes the file
description. Otherwise, it returns the JSON-LD of the ABox parser.
If the pipeline mode is TBOX, it returns the JSON-LD of the TBox parser.
Args:
None
Returns:
Dict[str, Any]: A dictionary of JSON-LD for the graph.
"""

if cls.mode == PipelineMode.ABOX:
if not cls.config.suppress_file_description:
model = {
Expand Down Expand Up @@ -177,7 +193,17 @@ def json_ld(cls) -> Dict[str, Any]:

@property
def graph(cls) -> Graph:
"""Return graph object"""
"""
Returns a graph object based on the pipeline's JSON-LD data.
The graph object is created with the identifier specified through the pipeline.
It is then populated with the JSON-LD data from the pipeline, and if additional
triples are provided, they are validated and added to the graph.
Returns:
Graph: A graph object containing the pipeline's data.
"""

graph = Graph(identifier=cls.config.graph_identifier)
graph.parse(data=json.dumps(cls.json_ld), format="json-ld")
if cls.additional_triples:
Expand Down
Loading

0 comments on commit caa1782

Please sign in to comment.