From c9c513edbd03e75dd2891c2bf9b016e48070bf66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20B=C3=BCschelberger?= Date: Fri, 17 Jan 2025 16:16:59 +0100 Subject: [PATCH] rename time_series to dataframe --- data2rdf/models/mapping.py | 2 +- data2rdf/parsers/base.py | 24 ++++----- data2rdf/parsers/csv.py | 48 +++++++++--------- data2rdf/parsers/excel.py | 42 ++++++++------- data2rdf/parsers/json.py | 22 ++++---- data2rdf/pipelines/main.py | 31 ++++++++--- docs/examples/abox/1_csv.md | 20 ++++---- docs/examples/abox/2_excel.md | 22 ++++---- docs/examples/abox/3_json.md | 2 +- docs/examples/abox/4_csv_wo_metadata.md | 14 ++--- docs/examples/abox/5_csv_w_na.md | 14 ++--- docs/examples/abox/6_custom_relations.md | 4 +- docs/examples/tbox/tbox.md | 2 +- examples/1_csv.ipynb | 6 +-- examples/2_excel.ipynb | 16 +++--- examples/3_json.ipynb | 4 +- examples/4_csv_wo_metadata.ipynb | 8 +-- examples/5_csv_w_na.ipynb | 8 +-- tests/abox/csv_empty_rows/test_parser.py | 14 ++--- tests/abox/csv_empty_rows/test_pipeline.py | 16 +++--- tests/abox/csv_pipeline_test/test_parser.py | 46 ++++++++--------- tests/abox/csv_pipeline_test/test_pipeline.py | 28 +++++----- tests/abox/csv_without_header/test_parser.py | 14 ++--- .../abox/csv_without_header/test_pipeline.py | 16 +++--- tests/abox/json_pipeline_test/test_parser.py | 20 ++++---- .../abox/json_pipeline_test/test_pipeline.py | 20 ++++---- .../bad_metadata_tensile_test_mapping.json | 12 ++--- .../bad_timeseries_tensile_test_mapping.json | 12 ++--- .../input/mapping/mapping_suffix.json | 12 ++--- .../input/mapping/tensile_test_mapping.csv | 2 +- .../input/mapping/tensile_test_mapping.json | 12 ++--- .../input/mapping/tensile_test_mapping.xlsx | Bin 7572 -> 9914 bytes tests/abox/xls_pipeline_test/test_parser.py | 40 +++++++-------- tests/abox/xls_pipeline_test/test_pipeline.py | 24 ++++----- 34 files changed, 297 insertions(+), 280 deletions(-) diff --git a/data2rdf/models/mapping.py b/data2rdf/models/mapping.py index 9fd5d133..878047c2 100644 --- a/data2rdf/models/mapping.py +++ b/data2rdf/models/mapping.py @@ -183,7 +183,7 @@ def validate_model(cls, self: "ABoxBaseMapping") -> "ABoxBaseMapping": class ABoxExcelMapping(ABoxBaseMapping): """A special model for mapping from excel files to semantic concepts in the ABox""" - time_series_start: Optional[str] = Field( + dataframe_start: Optional[str] = Field( None, description="Cell location for the start of the time series quantity", ) diff --git a/data2rdf/parsers/base.py b/data2rdf/parsers/base.py index 861576e8..498529ff 100644 --- a/data2rdf/parsers/base.py +++ b/data2rdf/parsers/base.py @@ -171,8 +171,8 @@ class ABoxBaseParser(AnyBoxBaseParser): """Basic Parser for ABox mode""" _general_metadata: Any = PrivateAttr() - _time_series_metadata: Any = PrivateAttr() - _time_series: Any = PrivateAttr() + _dataframe_metadata: Any = PrivateAttr() + _dataframe: Any = PrivateAttr() @property def general_metadata(self) -> "List[BasicConceptMapping]": @@ -180,14 +180,14 @@ def general_metadata(self) -> "List[BasicConceptMapping]": return self._general_metadata @property - def time_series_metadata(self) -> "List[BasicConceptMapping]": + def dataframe_metadata(self) -> "List[BasicConceptMapping]": """Return list object with general metadata""" - return self._time_series_metadata + return self._dataframe_metadata @property - def time_series(self) -> "pd.DataFrame": + def dataframe(self) -> "pd.DataFrame": """Return times series found in the data as pd.DataFrame""" - return self._time_series + return self._dataframe @property def plain_metadata(self) -> List[Dict[str, Any]]: @@ -343,23 +343,23 @@ def general_metadata(cls) -> "List[BasicConceptMapping]": ) @property - def time_series_metadata(cls) -> "List[BasicConceptMapping]": + def dataframe_metadata(cls) -> "List[BasicConceptMapping]": """Return time series metadata""" if cls.mode == PipelineMode.ABOX: - return cls.abox.time_series_metadata + return cls.abox.dataframe_metadata else: raise NotImplementedError( - "`time_series_metadata` is not available in `tbox`-mode." + "`dataframe_metadata` is not available in `tbox`-mode." ) @property - def time_series(cls) -> "Dict[str, Any]": + def dataframe(cls) -> "Dict[str, Any]": """Return time series""" if cls.mode == PipelineMode.ABOX: - return cls.abox.time_series + return cls.abox.dataframe else: raise NotImplementedError( - "`time_series` is not available in `tbox`-mode." + "`dataframe` is not available in `tbox`-mode." ) @property diff --git a/data2rdf/parsers/csv.py b/data2rdf/parsers/csv.py index 58fc33a2..08b65b04 100644 --- a/data2rdf/parsers/csv.py +++ b/data2rdf/parsers/csv.py @@ -119,10 +119,10 @@ class CSVABoxParser(ABoxBaseParser): None, description="Metadata column separator" ) metadata_length: int = Field(..., description="Length of the metadata") - time_series_sep: Optional[str] = Field( + dataframe_sep: Optional[str] = Field( None, description="Column separator of the time series header" ) - time_series_header_length: int = Field( + dataframe_header_length: int = Field( 2, description="Length of header of the time series" ) fillna: Optional[Any] = Field( @@ -207,7 +207,7 @@ def json_ld(cls) -> "Dict[str, Any]": ) tables += [meta_table] - if cls.time_series_metadata: + if cls.dataframe_metadata: column_schema = {"@type": "csvw:Schema", "csvw:column": []} tables += [ { @@ -216,7 +216,7 @@ def json_ld(cls) -> "Dict[str, Any]": "csvw:tableSchema": column_schema, } ] - for idx, mapping in enumerate(cls.time_series_metadata): + for idx, mapping in enumerate(cls.dataframe_metadata): if isinstance(mapping, QuantityGraph): entity = {"qudt:quantity": mapping.json_ld} elif isinstance(mapping, PropertyGraph): @@ -289,7 +289,7 @@ def json_ld(cls) -> "Dict[str, Any]": else: json_ld = { "@graph": [model.json_ld for model in cls.general_metadata] - + [model.json_ld for model in cls.time_series_metadata] + + [model.json_ld for model in cls.dataframe_metadata] } return json_ld @@ -311,8 +311,8 @@ def _run_parser( The function returns None, but it populates the following instance variables: - `self._general_metadata`: A list of PropertyGraph or QuantityGraph instances representing the general metadata. - - `self._time_series_metadata`: A list of QuantityGraph instances representing the time series metadata. - - `self._time_series`: A pandas DataFrame containing the time series data. + - `self._dataframe_metadata`: A list of QuantityGraph instances representing the time series metadata. + - `self._dataframe`: A pandas DataFrame containing the time series data. The function also raises ValueError if the `metadata_length` is greater than 0 but `metadata_sep` is not set. It raises TypeError if the unit for a key is not a string. @@ -327,11 +327,11 @@ def _run_parser( mapping = {model.key: model for model in mapping} - time_series: Union[pd.DataFrame, List[None]] = cls._parse_time_series( + dataframe: Union[pd.DataFrame, List[None]] = cls._parse_dataframe( self, datafile ) if self.dropna: - time_series.dropna(inplace=True) + dataframe.dropna(inplace=True) datafile.seek(0) # iterate over general metadata @@ -399,10 +399,10 @@ def _run_parser( ) # parse time series data and meta data - self._time_series_metadata = [] - self._time_series = {} + self._dataframe_metadata = [] + self._dataframe = {} - for key in time_series: + for key in dataframe: # get matching mapping mapping_match = mapping.get(key) @@ -411,8 +411,8 @@ def _run_parser( unit = ( mapping_match.unit or ( - time_series[key].iloc[0] - if self.time_series_header_length == 2 + dataframe[key].iloc[0] + if self.dataframe_header_length == 2 else None ) or None @@ -439,11 +439,11 @@ def _run_parser( model.unit_relation = mapping_match.unit_relation # append model - self.time_series_metadata.append(model) + self.dataframe_metadata.append(model) # assign time series data - self._time_series[model.suffix] = time_series[key][ - self.time_series_header_length - 1 : + self._dataframe[model.suffix] = dataframe[key][ + self.dataframe_header_length - 1 : ].to_list() else: @@ -452,12 +452,12 @@ def _run_parser( MappingMissmatchWarning, ) # set time series as pd dataframe - self._time_series = pd.DataFrame.from_dict( - self._time_series, orient="index" + self._dataframe = pd.DataFrame.from_dict( + self._dataframe, orient="index" ).transpose() # check if drop na: if self.dropna: - self._time_series.dropna(how="all", inplace=True) + self._dataframe.dropna(how="all", inplace=True) # OVERRIDE @classmethod @@ -466,14 +466,14 @@ def _load_data_file(cls, self: "CSVABoxParser") -> StringIO: return _load_data_file(self) @classmethod - def _parse_time_series( + def _parse_dataframe( cls, self: "CSVParser", datafile: "StringIO" ) -> Union[pd.DataFrame, List[None]]: - if self.time_series_sep: + if self.dataframe_sep: response = pd.read_csv( datafile, encoding=self.config.encoding, - sep=self.time_series_sep, + sep=self.dataframe_sep, skiprows=self.metadata_length, ) response = response.map( @@ -485,7 +485,7 @@ def _parse_time_series( ] else: warnings.warn( - "`time_series_sep` is not set. Any potential time series in the data file will be skipped.", + "`dataframe_sep` is not set. Any potential time series in the data file will be skipped.", ParserWarning, ) response = [] diff --git a/data2rdf/parsers/excel.py b/data2rdf/parsers/excel.py index 8e211550..57fc5699 100644 --- a/data2rdf/parsers/excel.py +++ b/data2rdf/parsers/excel.py @@ -189,7 +189,7 @@ def json_ld(cls) -> Dict[str, Any]: ) tables += [meta_table] - if cls.time_series_metadata: + if cls.dataframe_metadata: column_schema = {"@type": "csvw:Schema", "csvw:column": []} tables += [ { @@ -198,7 +198,7 @@ def json_ld(cls) -> Dict[str, Any]: "csvw:tableSchema": column_schema, } ] - for idx, mapping in enumerate(cls.time_series_metadata): + for idx, mapping in enumerate(cls.dataframe_metadata): if isinstance(mapping, QuantityGraph): entity = {"qudt:quantity": mapping.json_ld} elif isinstance(mapping, PropertyGraph): @@ -271,7 +271,7 @@ def json_ld(cls) -> Dict[str, Any]: else: json_ld = { "@graph": [model.json_ld for model in cls.general_metadata] - + [model.json_ld for model in cls.time_series_metadata] + + [model.json_ld for model in cls.dataframe_metadata] } return json_ld @@ -301,8 +301,8 @@ def _run_parser( datafile.seek(0) self._general_metadata = [] - self._time_series_metadata = [] - self._time_series = {} + self._dataframe_metadata = [] + self._dataframe = {} for datum in mapping: worksheet = workbook[datum.worksheet] @@ -320,28 +320,26 @@ def _run_parser( suffix = quote(suffix) if not datum.custom_relations: - if datum.value_location and datum.time_series_start: + if datum.value_location and datum.dataframe_start: raise RuntimeError( - """Both, `value_location` and `time_series_start + """Both, `value_location` and `dataframe_start are set. Only one of them must be set.""" ) # find data for time series - if datum.time_series_start: - column_name = datum.time_series_start.rstrip("0123456789") - time_series_end = f"{column_name}{worksheet.max_row}" + if datum.dataframe_start: + column_name = datum.dataframe_start.rstrip("0123456789") + dataframe_end = f"{column_name}{worksheet.max_row}" - column = worksheet[ - datum.time_series_start : time_series_end - ] + column = worksheet[datum.dataframe_start : dataframe_end] if column: - self._time_series[suffix] = [ + self._dataframe[suffix] = [ cell[0].value for cell in column ] else: message = f"""Concept with key `{datum.key}` - does not have a time series from `{datum.time_series_start}` - until `{time_series_end}` . + does not have a time series from `{datum.dataframe_start}` + until `{dataframe_end}` . Concept will be omitted in graph. """ warnings.warn(message, MappingMissmatchWarning) @@ -391,7 +389,7 @@ def _run_parser( "config": self.config, } - if datum.value_location and not datum.time_series_start: + if datum.value_location and not datum.dataframe_start: value = worksheet[datum.value_location].value if model_data.get("unit") and _value_exists(value): @@ -409,7 +407,7 @@ def _run_parser( value_exists = _value_exists(value) - if value_exists or suffix in self.time_series: + if value_exists or suffix in self.dataframe: if datum.value_relation: model_data["value_relation"] = datum.value_relation if model_data.get("unit"): @@ -426,7 +424,7 @@ def _run_parser( if value_exists: self._general_metadata.append(model) else: - self._time_series_metadata.append(model) + self._dataframe_metadata.append(model) else: for relation in datum.custom_relations: @@ -479,12 +477,12 @@ def _run_parser( warnings.warn(message, MappingMissmatchWarning) # set time series as pd dataframe - self._time_series = pd.DataFrame.from_dict( - self._time_series, orient="index" + self._dataframe = pd.DataFrame.from_dict( + self._dataframe, orient="index" ).transpose() # check if drop na: if self.dropna: - self._time_series.dropna(how="all", inplace=True) + self._dataframe.dropna(how="all", inplace=True) # OVERRIDE @classmethod diff --git a/data2rdf/parsers/json.py b/data2rdf/parsers/json.py index cfd3665a..9727ddb3 100644 --- a/data2rdf/parsers/json.py +++ b/data2rdf/parsers/json.py @@ -196,7 +196,7 @@ def json_ld(cls) -> Dict[str, Any]: f"Mapping must be of type {QuantityGraph} or {PropertyGraph}, not {type(mapping)}" ) - for idx, mapping in enumerate(cls.time_series_metadata): + for idx, mapping in enumerate(cls.dataframe_metadata): if not isinstance(mapping, QuantityGraph): raise TypeError( f"Mapping must be of type {QuantityGraph}, not {type(mapping)}" @@ -242,7 +242,7 @@ def json_ld(cls) -> Dict[str, Any]: else: triples = { "@graph": [model.json_ld for model in cls.general_metadata] - + [model.json_ld for model in cls.time_series_metadata] + + [model.json_ld for model in cls.dataframe_metadata] } return triples @@ -284,8 +284,8 @@ def _run_parser( None """ self._general_metadata = [] - self._time_series_metadata = [] - self._time_series = {} + self._dataframe_metadata = [] + self._dataframe = {} for datum in mapping: subdataset = self._get_optional_subdataset(datafile, datum) @@ -371,8 +371,8 @@ def _run_parser( model_data["unit_relation"] = datum.unit_relation model = QuantityGraph(**model_data) - self._time_series[suffix] = value - self._time_series_metadata.append(model) + self._dataframe[suffix] = value + self._dataframe_metadata.append(model) # if we have a series in the form of a list and a unit and we are expanding: # * iterate over the series # * make a QuantityGraph with the unit and each iterated value @@ -399,8 +399,8 @@ def _run_parser( value_relation_type=datum.value_relation_type, **model_data, ) - self._time_series[suffix] = value - self._time_series_metadata.append(model) + self._dataframe[suffix] = value + self._dataframe_metadata.append(model) # if we have a series in the form of a list and *no* unit and we are expanding: # * iterate over the series # * make a PropertyGraph with each iterated value @@ -467,12 +467,12 @@ def _run_parser( ) # set time series as pd dataframe - self._time_series = pd.DataFrame.from_dict( - self._time_series, orient="index" + self._dataframe = pd.DataFrame.from_dict( + self._dataframe, orient="index" ).transpose() # check if drop na: if self.dropna: - self._time_series.dropna(how="all", inplace=True) + self._dataframe.dropna(how="all", inplace=True) def _get_optional_subdataset( self, datafile: Any, datum: ABoxBaseMapping diff --git a/data2rdf/pipelines/main.py b/data2rdf/pipelines/main.py index f3109100..30fe19b6 100644 --- a/data2rdf/pipelines/main.py +++ b/data2rdf/pipelines/main.py @@ -1,6 +1,7 @@ """Data2RDF ABox pipeline""" import json +import warnings from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union @@ -241,21 +242,39 @@ def general_metadata(cls) -> "List[BasicConceptMapping]": ) @property - def time_series_metadata(cls) -> "List[BasicConceptMapping]": + def dataframe_metadata(cls) -> "List[BasicConceptMapping]": """Return list object with time series metadata""" if cls.mode == PipelineMode.ABOX: - return cls.parser.abox.time_series_metadata + return cls.parser.abox.dataframe_metadata else: raise NotImplementedError( - "`time_series_metadata` is not available in `tbox`-mode." + "`dataframe_metadata` is not available in `tbox`-mode." ) @property - def time_series(cls) -> "Dict[str, Any]": + def dataframe(cls) -> "Dict[str, Any]": """Return time series""" if cls.mode == PipelineMode.ABOX: - return cls.parser.abox.time_series + return cls.parser.abox.dataframe else: raise NotImplementedError( - "`time_series` is not available in `tbox`-mode." + "`dataframe` is not available in `tbox`-mode." ) + + @property + def time_series(self) -> "Dict[str, Any]": + warnings.warn( + "`time_series` is deprecated and will be removed in a future version. " + "Use `dataframe` instead.", + DeprecationWarning, + ) + return self.dataframe + + @property + def time_series_metadata(self) -> "List[BasicConceptMapping]": + warnings.warn( + "`time_series_metadata` is deprecated and will be removed in a future version. " + "Use `dataframe_metadata` instead.", + DeprecationWarning, + ) + return self.dataframe_metadata diff --git a/docs/examples/abox/1_csv.md b/docs/examples/abox/1_csv.md index 4130da13..b758e93c 100644 --- a/docs/examples/abox/1_csv.md +++ b/docs/examples/abox/1_csv.md @@ -49,7 +49,7 @@ Since we are assuming to have a csv file, we can assume the following parser arg * `"metadata_sep"`: The separator of the metadata In this example, we assume that the metadata is tab-separated. Hence the argument is `"\t"`. -* `"time_series_sep"`: The separator of the time series +* `"dataframe_sep"`: The separator of the time series In this example, we assume that the time series is tab-separated. Hence the argument is `"\t"`. * `"metadata_length"`: The length of the metadata In this example, we assume that the metadata has 22 rows. @@ -65,7 +65,7 @@ Since we are assuming to have a csv file, we can assume the following parser arg "Temperatur" 22 "°C" "Bemerkung" "" ``` -* `time_series_header_length`: The length of the header of the time series. +* `dataframe_header_length`: The length of the header of the time series. In this example, we assume that the time series has 2 rows, which is the name of the concept in the first row and the corresponding unit in the second row: ``` "Standardweg" "Breitenänderung" "Dehnung" @@ -80,7 +80,7 @@ The according parser args hence will look like this: ``` parser_args = { "metadata_sep":"\t", - "time_series_sep":"\t", + "dataframe_sep":"\t", "metadata_length":20 } ``` @@ -429,7 +429,7 @@ from data2rdf import Data2RDF, Parser parser_args = { "metadata_sep":"\t", - "time_series_sep":"\t", + "dataframe_sep":"\t", "metadata_length":20 } @@ -527,8 +527,8 @@ The pipeline will deliver you the following outputs: * `graph`: the generated RDF graph * `plain_metadata`: the plain values of the metadata of the experiment -* `time_series`: the plain time series of the experiment -* `time_series_metadata`: the metadata of the time series +* `dataframe`: the plain time series of the experiment +* `dataframe_metadata`: the metadata of the time series ### The RDF graph @@ -1450,10 +1450,10 @@ print({obj.suffix: obj.value for obj in pipeline.general_metadata}) ### The time series metadata -In case of the need of further processing the time series metadata (header of the time series) resulting from the pipeline after parsing, the `time_series_metadata` property can be accessed as follows: +In case of the need of further processing the time series metadata (header of the time series) resulting from the pipeline after parsing, the `dataframe_metadata` property can be accessed as follows: ``` -print(pipeline.time_series_metadata) +print(pipeline.dataframe_metadata) ``` The result should look like this: @@ -1514,10 +1514,10 @@ The result is a list of `QuantityGraph` which (or `PropertyGraph` in case of non ### The time series data -In case of the need of further processing the time series data (tabular data) resulting from the pipeline after parsing, the `time_series` property can be accessed as follows: +In case of the need of further processing the time series data (tabular data) resulting from the pipeline after parsing, the `dataframe` property can be accessed as follows: ``` -print(pipeline.time_series) +print(pipeline.dataframe) ``` The result is a pandas dataframe and should look like this: diff --git a/docs/examples/abox/2_excel.md b/docs/examples/abox/2_excel.md index d9595f0e..c2724815 100644 --- a/docs/examples/abox/2_excel.md +++ b/docs/examples/abox/2_excel.md @@ -43,7 +43,7 @@ The original file can be accessed [here](https://github.com/MI-FraunhoferIWM/dat ### The mapping -In contrast to the previous CSV example, we have to provide more information about the location of the data in the excel file. Previously, we simply had to provide the `key` of the concept in the data file. But since we are using `openpyxl`, we need to provide the `worksheet`, `value_location` (in case of metadata), `time_series_start` (in case of time series) and `unit_location` (in case of quantitative data) for each concept in the excel file. +In contrast to the previous CSV example, we have to provide more information about the location of the data in the excel file. Previously, we simply had to provide the `key` of the concept in the data file. But since we are using `openpyxl`, we need to provide the `worksheet`, `value_location` (in case of metadata), `dataframe_start` (in case of time series) and `unit_location` (in case of quantitative data) for each concept in the excel file. A valid mapping for the example file show above may look like this: @@ -62,7 +62,7 @@ A valid mapping for the example file show above may look like this: { "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", + "dataframe_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -75,7 +75,7 @@ A valid mapping for the example file show above may look like this: { "iri": "https://w3id.org/steel/ProcessOntology/Elongation", "key": "Dehnung", - "time_series_start": "Q15", + "dataframe_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" }, @@ -145,21 +145,21 @@ A valid mapping for the example file show above may look like this: { "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", "key": "Standardkraft", - "time_series_start": "C15", + "dataframe_start": "C15", "unit_location": "C14", "worksheet": "Messdaten" }, { "iri": "https://w3id.org/steel/ProcessOntology/Extension", "key": "Standardweg", - "time_series_start": "D15", + "dataframe_start": "D15", "unit_location": "D14", "worksheet": "Messdaten" }, { "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", "key": "Traversenweg absolut", - "time_series_start": "B15", + "dataframe_start": "B15", "unit_location": "B14", "worksheet": "Messdaten" }, @@ -173,7 +173,7 @@ A valid mapping for the example file show above may look like this: { "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", - "time_series_start": "A15", + "dataframe_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } @@ -199,7 +199,7 @@ Whereas the mapping of a time series looks like this: { "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", -"time_series_start": "A15", +"dataframe_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } @@ -214,7 +214,7 @@ In case if the unit cannot be parsed from the excel sheet, we again have the opp { "iri": "https://w3id.org/steel/ProcessOntology/Elongation", "key": "Dehnung", - "time_series_start": "Q15", + "dataframe_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" } @@ -380,7 +380,7 @@ mapping = [ { "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", + "dataframe_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -690,4 +690,4 @@ fileid:TensileTestSpecimen a prov:Agent, -Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `time_series_metadata` and `time_series` attributes in the same way as stated in the [first example](1_csv). +Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `dataframe_metadata` and `dataframe` attributes in the same way as stated in the [first example](1_csv). diff --git a/docs/examples/abox/3_json.md b/docs/examples/abox/3_json.md index da25d918..4cf7100f 100644 --- a/docs/examples/abox/3_json.md +++ b/docs/examples/abox/3_json.md @@ -291,4 +291,4 @@ fileid:WidthChange a ; -Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `time_series_metadata` and `time_series` attributes in the same way as stated in the [first example](1_csv). +Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `dataframe_metadata` and `dataframe` attributes in the same way as stated in the [first example](1_csv). diff --git a/docs/examples/abox/4_csv_wo_metadata.md b/docs/examples/abox/4_csv_wo_metadata.md index f77ae75c..ff6dc1c5 100644 --- a/docs/examples/abox/4_csv_wo_metadata.md +++ b/docs/examples/abox/4_csv_wo_metadata.md @@ -36,17 +36,17 @@ You may note that the first column is the time and the rest of the columns are o Since we are considering the csv parser again, we need to take the following parser arguments into account: -* `time_series_sep`: the separator for the time series. In this case, it is a `,`. +* `dataframe_sep`: the separator for the time series. In this case, it is a `,`. * `metadata_length`: the length of the metadata in the csv file. In this case, it is 0, since we do not have any metadata. -* `time_series_header_length`: the length of the header of the time series in the csv file. In this case, it is 1, since the time series start at the second row. +* `dataframe_header_length`: the length of the header of the time series in the csv file. In this case, it is 1, since the time series start at the second row. The resulting Python dictionary for the parser arguments would look like this: ``` parser_args = { - "time_series_sep": ",", + "dataframe_sep": ",", "metadata_length": 0, - "time_series_header_length": 1 + "dataframe_header_length": 1 } ``` @@ -121,9 +121,9 @@ mapping = [ ] parser_args = { - "time_series_sep": ",", + "dataframe_sep": ",", "metadata_length": 0, - "time_series_header_length": 1 + "dataframe_header_length": 1 } data2rdf = Data2RDF( @@ -204,4 +204,4 @@ fileid:tableGroup a csvw:TableGroup ; -Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `time_series_metadata` and `time_series` attributes in the same way as stated in the [first example](1_csv). +Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `dataframe_metadata` and `dataframe` attributes in the same way as stated in the [first example](1_csv). diff --git a/docs/examples/abox/5_csv_w_na.md b/docs/examples/abox/5_csv_w_na.md index 6434b83e..a84e406e 100644 --- a/docs/examples/abox/5_csv_w_na.md +++ b/docs/examples/abox/5_csv_w_na.md @@ -65,17 +65,17 @@ Additionally, there are some missing values, which are marked with `;;` in the c According to the condition of the csv parser, we need to take the following parser arguments into account: -* `time_series_sep`: the separator for the time series. In this case, it is a `;`. +* `dataframe_sep`: the separator for the time series. In this case, it is a `;`. * `metadata_length`: the length of the metadata in the csv file. In this case, it is 0, since we do not have any metadata. -* `time_series_header_length`: the length of the header of the time series in the csv file. In this case, it is 1, since the time series start at the second row. +* `dataframe_header_length`: the length of the header of the time series in the csv file. In this case, it is 1, since the time series start at the second row. * `drop_na`: whether to drop the rows with missing values. In this case, it is `False`. The according Python dict for the parser arguments would look like this: ``` parser_args = { - "time_series_sep": ";", + "dataframe_sep": ";", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, "drop_na": False } ``` @@ -206,9 +206,9 @@ mapping = [ ] parser_args = { - "time_series_sep": ";", + "dataframe_sep": ";", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, "drop_na": False } @@ -327,4 +327,4 @@ fileid:tableGroup a csvw:TableGroup ; -Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `time_series_metadata` and `time_series` attributes in the same way as stated in the [first example](1_csv). +Again, you will be able to investigate the `general_metadata`, `plain_metadata`, `dataframe_metadata` and `dataframe` attributes in the same way as stated in the [first example](1_csv). diff --git a/docs/examples/abox/6_custom_relations.md b/docs/examples/abox/6_custom_relations.md index 9c4004af..669fe856 100644 --- a/docs/examples/abox/6_custom_relations.md +++ b/docs/examples/abox/6_custom_relations.md @@ -83,7 +83,7 @@ Once the `source` field is set with the according wildcard, we are assuming that ```{warning} -Once you use the `custom_relations` field, the `value_location`, `time_series_start`, and `unit_location` fields will be ignored. +Once you use the `custom_relations` field, the `value_location`, `dataframe_start`, and `unit_location` fields will be ignored. ``` ### Additional triples @@ -209,4 +209,4 @@ nanoindentation:Jane a chameo:Operator ; ``` -Again, you will be able to investigate the `general_metadata` and `plain_metadata` in the same way as stated in the [first example](1_csv). But this does take place for the `time_series_metadata` and `time_series` attributes, since we do not include any time series in this example here. +Again, you will be able to investigate the `general_metadata` and `plain_metadata` in the same way as stated in the [first example](1_csv). But this does take place for the `dataframe_metadata` and `dataframe` attributes, since we do not include any time series in this example here. diff --git a/docs/examples/tbox/tbox.md b/docs/examples/tbox/tbox.md index f8ad9d0f..5f0ab4fe 100644 --- a/docs/examples/tbox/tbox.md +++ b/docs/examples/tbox/tbox.md @@ -244,4 +244,4 @@ When the pipeline run is succeded, you see the following output by running `prin ns1:hasTypicalUnitLabel "GPa"^^xsd:string . ``` -In this case, there will be **no** `general_metadata`, `plain_metadata`, `time_series` or `time_series_metadata` attributes, since those outputs do not apply in the for the tbox mode of the pipeline. +In this case, there will be **no** `general_metadata`, `plain_metadata`, `dataframe` or `dataframe_metadata` attributes, since those outputs do not apply in the for the tbox mode of the pipeline. diff --git a/examples/1_csv.ipynb b/examples/1_csv.ipynb index 51c9c7c0..442334db 100644 --- a/examples/1_csv.ipynb +++ b/examples/1_csv.ipynb @@ -5965,7 +5965,7 @@ "source": [ "parser_args = {\n", " \"metadata_sep\":\"\\t\",\n", - " \"time_series_sep\":\"\\t\",\n", + " \"dataframe_sep\":\"\\t\",\n", " \"metadata_length\":20\n", " }" ] @@ -6736,7 +6736,7 @@ } ], "source": [ - "pipeline.time_series_metadata" + "pipeline.dataframe_metadata" ] }, { @@ -6773,7 +6773,7 @@ } ], "source": [ - "print(pipeline.time_series)" + "print(pipeline.dataframe)" ] } ], diff --git a/examples/2_excel.ipynb b/examples/2_excel.ipynb index 43ae313b..aa429c3a 100644 --- a/examples/2_excel.ipynb +++ b/examples/2_excel.ipynb @@ -75,7 +75,7 @@ " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/WidthChange\",\n", " \"key\": \"Breiten\\u00e4nderung\",\n", - " \"time_series_start\": \"E15\",\n", + " \"dataframe_start\": \"E15\",\n", " \"unit_location\": \"E14\",\n", " \"worksheet\": \"Messdaten\"\n", " },\n", @@ -88,7 +88,7 @@ " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/PercentageElongation\",\n", " \"key\": \"Dehnung\",\n", - " \"time_series_start\": \"Q15\",\n", + " \"dataframe_start\": \"Q15\",\n", " \"unit\": \"\\u00f7\",\n", " \"worksheet\": \"Messdaten\"\n", " },\n", @@ -158,21 +158,21 @@ " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/StandardForce\",\n", " \"key\": \"Standardkraft\",\n", - " \"time_series_start\": \"C15\",\n", + " \"dataframe_start\": \"C15\",\n", " \"unit_location\": \"C14\",\n", " \"worksheet\": \"Messdaten\"\n", " },\n", " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/Extension\",\n", " \"key\": \"Standardweg\",\n", - " \"time_series_start\": \"D15\",\n", + " \"dataframe_start\": \"D15\",\n", " \"unit_location\": \"D14\",\n", " \"worksheet\": \"Messdaten\"\n", " },\n", " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel\",\n", " \"key\": \"Traversenweg absolut\",\n", - " \"time_series_start\": \"B15\",\n", + " \"dataframe_start\": \"B15\",\n", " \"unit_location\": \"B14\",\n", " \"worksheet\": \"Messdaten\"\n", " },\n", @@ -186,7 +186,7 @@ " {\n", " \"iri\": \"https://w3id.org/steel/ProcessOntology/TestTime\",\n", " \"key\": \"Zeit\",\n", - " \"time_series_start\": \"A15\",\n", + " \"dataframe_start\": \"A15\",\n", " \"unit_location\": \"A14\",\n", " \"worksheet\": \"Messdaten\"\n", " }\n", @@ -852,7 +852,7 @@ } ], "source": [ - "pipeline.time_series_metadata" + "pipeline.dataframe_metadata" ] }, { @@ -902,7 +902,7 @@ } ], "source": [ - "print(pipeline.time_series)" + "print(pipeline.dataframe)" ] } ], diff --git a/examples/3_json.ipynb b/examples/3_json.ipynb index cab35395..ca1753ab 100644 --- a/examples/3_json.ipynb +++ b/examples/3_json.ipynb @@ -342,7 +342,7 @@ } ], "source": [ - "pipeline.time_series_metadata" + "pipeline.dataframe_metadata" ] }, { @@ -369,7 +369,7 @@ } ], "source": [ - "print(pipeline.time_series)" + "print(pipeline.dataframe)" ] } ], diff --git a/examples/4_csv_wo_metadata.ipynb b/examples/4_csv_wo_metadata.ipynb index be3166e3..626ef10c 100644 --- a/examples/4_csv_wo_metadata.ipynb +++ b/examples/4_csv_wo_metadata.ipynb @@ -127,9 +127,9 @@ "outputs": [], "source": [ "parser_args = {\n", - " \"time_series_sep\": \",\",\n", + " \"dataframe_sep\": \",\",\n", " \"metadata_length\": 0,\n", - " \"time_series_header_length\": 1\n", + " \"dataframe_header_length\": 1\n", "}" ] }, @@ -289,7 +289,7 @@ } ], "source": [ - "pipeline.time_series_metadata" + "pipeline.dataframe_metadata" ] }, { @@ -317,7 +317,7 @@ } ], "source": [ - "print(pipeline.time_series)" + "print(pipeline.dataframe)" ] } ], diff --git a/examples/5_csv_w_na.ipynb b/examples/5_csv_w_na.ipynb index 2c5342a7..bc3e9486 100644 --- a/examples/5_csv_w_na.ipynb +++ b/examples/5_csv_w_na.ipynb @@ -170,9 +170,9 @@ "outputs": [], "source": [ "parser_args = {\n", - " \"time_series_sep\": \";\",\n", + " \"dataframe_sep\": \";\",\n", " \"metadata_length\": 0,\n", - " \"time_series_header_length\": 1,\n", + " \"dataframe_header_length\": 1,\n", " \"drop_na\": False\n", "}" ] @@ -391,7 +391,7 @@ } ], "source": [ - "pipeline.time_series_metadata" + "pipeline.dataframe_metadata" ] }, { @@ -479,7 +479,7 @@ } ], "source": [ - "print(pipeline.time_series)" + "print(pipeline.dataframe)" ] } ], diff --git a/tests/abox/csv_empty_rows/test_parser.py b/tests/abox/csv_empty_rows/test_parser.py index ed2f5279..f52d9077 100644 --- a/tests/abox/csv_empty_rows/test_parser.py +++ b/tests/abox/csv_empty_rows/test_parser.py @@ -13,9 +13,9 @@ expected = os.path.join(output_folder, "output_csv_parser.ttl") parser_args = { - "time_series_sep": ";", + "dataframe_sep": ";", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, "drop_na": False, } @@ -50,14 +50,14 @@ def test_csv_nan_vals() -> None: assert len(parser.general_metadata) == 0 - assert len(parser.time_series_metadata) == 7 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 7 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 7 - assert sorted(list(parser.time_series.columns)) == sorted(columns) + assert len(parser.dataframe.columns) == 7 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + for name, column in parser.dataframe.items(): assert len(column) == 31 assert parser.graph.isomorphic(expected_graph) diff --git a/tests/abox/csv_empty_rows/test_pipeline.py b/tests/abox/csv_empty_rows/test_pipeline.py index 5a9be28a..f71c9f4e 100644 --- a/tests/abox/csv_empty_rows/test_pipeline.py +++ b/tests/abox/csv_empty_rows/test_pipeline.py @@ -13,9 +13,9 @@ expected = os.path.join(output_folder, "output_csv_pipeline.ttl") parser_args = { - "time_series_sep": ";", + "dataframe_sep": ";", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, "drop_na": False, } @@ -54,17 +54,17 @@ def test_csv_na_values_pipeline() -> None: assert pipeline.graph.isomorphic(expected_graph) assert str(pipeline.graph.identifier) == config["graph_identifier"] - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) assert len(pipeline.general_metadata) == 0 - assert len(pipeline.time_series_metadata) == 7 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 7 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 7 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 7 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 31 expected_graph = Graph() diff --git a/tests/abox/csv_pipeline_test/test_parser.py b/tests/abox/csv_pipeline_test/test_parser.py index f8251e00..55f966a3 100644 --- a/tests/abox/csv_pipeline_test/test_parser.py +++ b/tests/abox/csv_pipeline_test/test_parser.py @@ -17,7 +17,7 @@ parser_args = { "metadata_sep": "\t", - "time_series_sep": "\t", + "dataframe_sep": "\t", "metadata_length": 20, } @@ -244,13 +244,13 @@ def test_csv_parser_bad_mapping() -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 5734 assert parser.graph.isomorphic(expected_graph) @@ -273,7 +273,7 @@ def test_csv_parser_no_match_in_mapping() -> None: mapping=os.path.join(mapping_folder, "tensile_test_mapping.json"), parser_args={ "metadata_sep": "\t", - "time_series_sep": "\t", + "dataframe_sep": "\t", "metadata_length": 21, }, ) @@ -292,13 +292,13 @@ def test_csv_parser_no_match_in_mapping() -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 5734 assert parser.graph.isomorphic(expected_graph) @@ -321,7 +321,7 @@ def test_csv_parser_config(config) -> None: assert parser.graph.isomorphic(expected_graph) assert str(parser.graph.identifier) == config["graph_identifier"] - assert sorted(list(parser.time_series.columns)) == sorted(columns) + assert sorted(list(parser.dataframe.columns)) == sorted(columns) @pytest.mark.parametrize("extension", ["xlsx", "json", "csv", dict]) @@ -351,13 +351,13 @@ def test_parser_csv(extension) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 5734 expected_graph = Graph() @@ -394,13 +394,13 @@ def test_parser_csv_input(input_kind) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 5734 expected_graph = Graph() diff --git a/tests/abox/csv_pipeline_test/test_pipeline.py b/tests/abox/csv_pipeline_test/test_pipeline.py index 16083dc0..c22a3e7d 100644 --- a/tests/abox/csv_pipeline_test/test_pipeline.py +++ b/tests/abox/csv_pipeline_test/test_pipeline.py @@ -21,7 +21,7 @@ parser_args = { "metadata_sep": "\t", - "time_series_sep": "\t", + "dataframe_sep": "\t", "metadata_length": 20, } metadata = { @@ -263,7 +263,7 @@ def test_csv_pipeline_no_match_in_mapping() -> None: mapping=os.path.join(mapping_folder, "tensile_test_mapping.json"), parser_args={ "metadata_sep": "\t", - "time_series_sep": "\t", + "dataframe_sep": "\t", "metadata_length": 21, }, ) @@ -279,7 +279,7 @@ def test_csv_pipeline_no_match_in_mapping() -> None: expected_graph.parse(expected) assert pipeline.graph.isomorphic(expected_graph) - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) @pytest.mark.parametrize("config", [normal_config, bad_config]) @@ -304,7 +304,7 @@ def test_csv_pipeline_config(config) -> None: assert pipeline.graph.isomorphic(expected_graph) assert str(pipeline.graph.identifier) == config["graph_identifier"] - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) @pytest.mark.parametrize("extension", ["xlsx", "json", "csv", dict]) @@ -340,13 +340,13 @@ def test_csv_pipeline(extension) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 6 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 6 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 6 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 6 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 5734 expected_graph = Graph() @@ -389,13 +389,13 @@ def test_csv_pipeline_inputs(input_kind) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 6 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 6 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 6 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 6 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 5734 expected_graph = Graph() diff --git a/tests/abox/csv_without_header/test_parser.py b/tests/abox/csv_without_header/test_parser.py index d998b5c1..eee39f8e 100644 --- a/tests/abox/csv_without_header/test_parser.py +++ b/tests/abox/csv_without_header/test_parser.py @@ -13,9 +13,9 @@ expected = os.path.join(output_folder, "output_csv_parser.ttl") parser_args = { - "time_series_sep": ",", + "dataframe_sep": ",", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, } columns = ["TestTime", "Sensor1", "Sensor2", "Sensor3"] @@ -41,14 +41,14 @@ def test_csv_wo_header_parser_config() -> None: assert len(parser.general_metadata) == 0 - assert len(parser.time_series_metadata) == 4 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 4 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 4 - assert sorted(list(parser.time_series.columns)) == sorted(columns) + assert len(parser.dataframe.columns) == 4 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + for name, column in parser.dataframe.items(): assert len(column) == 4 assert parser.graph.isomorphic(expected_graph) diff --git a/tests/abox/csv_without_header/test_pipeline.py b/tests/abox/csv_without_header/test_pipeline.py index c6750fc0..8b5b0aad 100644 --- a/tests/abox/csv_without_header/test_pipeline.py +++ b/tests/abox/csv_without_header/test_pipeline.py @@ -14,9 +14,9 @@ parser_args = { - "time_series_sep": ",", + "dataframe_sep": ",", "metadata_length": 0, - "time_series_header_length": 1, + "dataframe_header_length": 1, } columns = ["TestTime", "Sensor1", "Sensor2", "Sensor3"] @@ -45,17 +45,17 @@ def test_csv_wo_header_pipeline() -> None: assert pipeline.graph.isomorphic(expected_graph) assert str(pipeline.graph.identifier) == config["graph_identifier"] - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) assert len(pipeline.general_metadata) == 0 - assert len(pipeline.time_series_metadata) == 4 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 4 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 4 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 4 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 4 expected_graph = Graph() diff --git a/tests/abox/json_pipeline_test/test_parser.py b/tests/abox/json_pipeline_test/test_parser.py index 95319a52..0e2284b9 100644 --- a/tests/abox/json_pipeline_test/test_parser.py +++ b/tests/abox/json_pipeline_test/test_parser.py @@ -77,13 +77,13 @@ def test_parser_json(mapping_format, data_format) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 2 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 2 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 2 - assert sorted(series) == sorted(parser.time_series) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 2 + assert sorted(series) == sorted(parser.dataframe) + for name, column in parser.dataframe.items(): assert len(column) == 3 expected_graph = Graph() @@ -119,13 +119,13 @@ def test_json_parser_different_mapping_files(extension) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 2 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 2 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 2 - assert sorted(series) == sorted(parser.time_series) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 2 + assert sorted(series) == sorted(parser.dataframe) + for name, column in parser.dataframe.items(): assert len(column) == 3 expected_graph = Graph() diff --git a/tests/abox/json_pipeline_test/test_pipeline.py b/tests/abox/json_pipeline_test/test_pipeline.py index f9c6a263..16a8db05 100644 --- a/tests/abox/json_pipeline_test/test_pipeline.py +++ b/tests/abox/json_pipeline_test/test_pipeline.py @@ -82,13 +82,13 @@ def test_pipeline_json(mapping_format, data_format) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 2 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 2 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 2 - assert sorted(series) == sorted(pipeline.time_series) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 2 + assert sorted(series) == sorted(pipeline.dataframe) + for name, column in pipeline.dataframe.items(): assert len(column) == 3 expected_graph = Graph() @@ -132,13 +132,13 @@ def test_json_pipeline_different_mapping_types(extension) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 2 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 2 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 2 - assert sorted(series) == sorted(pipeline.time_series) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 2 + assert sorted(series) == sorted(pipeline.dataframe) + for name, column in pipeline.dataframe.items(): assert len(column) == 3 expected_graph = Graph() diff --git a/tests/abox/xls_pipeline_test/input/mapping/bad_metadata_tensile_test_mapping.json b/tests/abox/xls_pipeline_test/input/mapping/bad_metadata_tensile_test_mapping.json index a73814ca..a6290a08 100644 --- a/tests/abox/xls_pipeline_test/input/mapping/bad_metadata_tensile_test_mapping.json +++ b/tests/abox/xls_pipeline_test/input/mapping/bad_metadata_tensile_test_mapping.json @@ -6,9 +6,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "E15", "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -19,9 +19,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "Q15", "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation", "key": "Dehnung", - "time_series_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" }, @@ -89,23 +89,23 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "C15", "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", "key": "Standardkraft", - "time_series_start": "C15", "unit_location": "C14", "worksheet": "Messdaten" }, { + "dataframe_start": "D15", "iri": "https://w3id.org/steel/ProcessOntology/Extension", "key": "Standardweg", - "time_series_start": "D15", "unit_location": "D14", "worksheet": "Messdaten" }, { + "dataframe_start": "B15", "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", "key": "Traversenweg absolut", - "time_series_start": "B15", "unit_location": "B14", "worksheet": "Messdaten" }, @@ -117,9 +117,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "A15", "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", - "time_series_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } diff --git a/tests/abox/xls_pipeline_test/input/mapping/bad_timeseries_tensile_test_mapping.json b/tests/abox/xls_pipeline_test/input/mapping/bad_timeseries_tensile_test_mapping.json index 05262e9b..c4310915 100644 --- a/tests/abox/xls_pipeline_test/input/mapping/bad_timeseries_tensile_test_mapping.json +++ b/tests/abox/xls_pipeline_test/input/mapping/bad_timeseries_tensile_test_mapping.json @@ -6,9 +6,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "E15", "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -19,9 +19,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "Q15", "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation", "key": "Dehnung", - "time_series_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" }, @@ -89,23 +89,23 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "C15", "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", "key": "Standardkraft", - "time_series_start": "C15", "unit_location": "C14", "worksheet": "Messdaten" }, { + "dataframe_start": "D5000", "iri": "https://w3id.org/steel/ProcessOntology/Extension", "key": "Standardweg", - "time_series_start": "D5000", "unit_location": "D4000", "worksheet": "Messdaten" }, { + "dataframe_start": "B15", "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", "key": "Traversenweg absolut", - "time_series_start": "B15", "unit_location": "B14", "worksheet": "Messdaten" }, @@ -117,9 +117,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "A15", "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", - "time_series_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } diff --git a/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json b/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json index fc166a8d..0bac70ad 100644 --- a/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json +++ b/tests/abox/xls_pipeline_test/input/mapping/mapping_suffix.json @@ -6,9 +6,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "E15", "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -20,9 +20,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "Q15", "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation", "key": "Dehnung", - "time_series_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" }, @@ -90,23 +90,23 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "C15", "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", "key": "Standardkraft", - "time_series_start": "C15", "unit_location": "C14", "worksheet": "Messdaten" }, { + "dataframe_start": "D15", "iri": "https://w3id.org/steel/ProcessOntology/Extension", "key": "Standardweg", - "time_series_start": "D15", "unit_location": "D14", "worksheet": "Messdaten" }, { + "dataframe_start": "B15", "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", "key": "Traversenweg absolut", - "time_series_start": "B15", "unit_location": "B14", "worksheet": "Messdaten" }, @@ -118,10 +118,10 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "A15", "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", "suffix": "Time1", - "time_series_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } diff --git a/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.csv b/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.csv index 3993641d..5dee1e0b 100644 --- a/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.csv +++ b/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.csv @@ -1,4 +1,4 @@ -key;iri;value_location;unit_location;worksheet;time_series_start;unit;annotation +key;iri;value_location;unit_location;worksheet;dataframe_start;unit;annotation Projekt;https://w3id.org/steel/ProcessOntology/ProjectNumber;F6;;Protokoll;;; Datum;https://w3id.org/steel/ProcessOntology/TimeStamp;AD6;;Protokoll;;; Prüfmaschine;https://w3id.org/steel/ProcessOntology/MachineData;I8;;Protokoll;;; diff --git a/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.json b/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.json index 58e2fbf2..4ca002bc 100644 --- a/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.json +++ b/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.json @@ -6,9 +6,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "E15", "iri": "https://w3id.org/steel/ProcessOntology/WidthChange", "key": "Breiten\u00e4nderung", - "time_series_start": "E15", "unit_location": "E14", "worksheet": "Messdaten" }, @@ -19,9 +19,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "Q15", "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation", "key": "Dehnung", - "time_series_start": "Q15", "unit": "\u00f7", "worksheet": "Messdaten" }, @@ -89,23 +89,23 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "C15", "iri": "https://w3id.org/steel/ProcessOntology/StandardForce", "key": "Standardkraft", - "time_series_start": "C15", "unit_location": "C14", "worksheet": "Messdaten" }, { + "dataframe_start": "D15", "iri": "https://w3id.org/steel/ProcessOntology/Extension", "key": "Standardweg", - "time_series_start": "D15", "unit_location": "D14", "worksheet": "Messdaten" }, { + "dataframe_start": "B15", "iri": "https://w3id.org/steel/ProcessOntology/AbsoluteCrossheadTravel", "key": "Traversenweg absolut", - "time_series_start": "B15", "unit_location": "B14", "worksheet": "Messdaten" }, @@ -117,9 +117,9 @@ "worksheet": "Protokoll" }, { + "dataframe_start": "A15", "iri": "https://w3id.org/steel/ProcessOntology/TestTime", "key": "Zeit", - "time_series_start": "A15", "unit_location": "A14", "worksheet": "Messdaten" } diff --git a/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.xlsx b/tests/abox/xls_pipeline_test/input/mapping/tensile_test_mapping.xlsx index 3a0e2fe5029ed67662e8a5870c1bf8c799a2e4a8..29d78c6e44f9678684c730354c86be29fc93492a 100644 GIT binary patch literal 9914 zcmeHt1y>!(*7m{O-5o-34o+~_;O_1RCkG1-K?1?uU4n#Q!JPoX1HmP@yNCB=X6`pL znfZRfz1^$3SFh@4cX!pZrS>ir1sGUd06YK@002+{%#N~bjGzF3I9LDx2Y?8zF9C9L zw{&th)&M(Ox*4**a&(}~hlPHY3xI|^|G(|OxCbf|hn0KSF{Q3$A0)oB$S>B3B61%F z4PZZ474Ga!=r1?d&$hLF%6@Z?DV~RC$6Jd%w&=roI%-qr&eH7anhQ`3lq zm#uH$fUbjpDBe|1_c#}qM3jTb*f_>21CZ*{*rUcJxh5uGNo<3MFC4IuH9w$*wbI{= zTCP{hKe*7fs;sq)#rL^v3UgxuXWGt|(Z}d~gGCNDirQxp-_xi_g-S=A(M+w`^)5xB z-R)TuaK^+OdYL^et_^+Z%L@Fbc3yHNF5Tn)3%d0Pyq#15o*!T7J}Ir@4S|O%bAXC=j(YcC~bHV`Ke!{+~Mj7wh0( ze!VhY>17W)YS^*tL-^qJ%u*btxPq6M>?bM>{{Xo~jE^w|v?NO%45XME#KAB!uiN|| zJ})f2h}j#Yx?1O|jKanhq-pS~3QM_j@j!S^@0u*_Qn}WPQu58bBtA+7zw9{Gz?!TEl{stNq5=o<~`JmxcXsLSWP2u?q0%J#_QSS z;$1Y62!6$b$uzt{H#6&R?|lays4j1bHPvnSZEMW3T?MGY#ug5p=aLy6IFG(8%9(>| z)Lhs%oMSQrbh($W_3F7!hqJ)FyhuG2gC|2lQ52=i5D)cFB_Vzvy!i&AAg6Eu00yLI zUOBLNfn4p)Kp^{{eydQ!5R}1=>z7miWOADoBtgmeyzHz@YQES;e<4LS$vo2p&cP~! zUUTND9VMjD(WnK#i~CCH?O5~mYQ?@^b{@j}7!}U`@7+BS3dPt|yd3SfK({+WA_Yrv zARRcGqPc4}xZ3!=r#who-VqBRi?cC3Pwi^hqn54imWRwr^+8LZ1i#pOBVF7rJvGrS z!`f-!T%5N=d>x)@^0-T7|D%a-zD}B(EeYl*&@QlDPRdlTKC=yhwZvN=A6*>`g~{{| z>0T$;Eb)U)`#bq5P58QNT2UV~FGP$}O85CJN{JL&szW%@PiVWq$6hxiux8LB z7;QOx+JpNwrY1&grTdMT)*2Smi$JX7DI3^IO&_|Tbw5lNKG*%#t{FXxhlX0#e5uof zEzJ_nXimEWF05n*j-xJ(!y8+wPT7>cQd}YnL7|SRfumKM?a(l!3;$Hz_jDh9D+}Q& z$Vy*-)#AS5(=;)=h#@p}vbf-@5lRJC>lnjIUT?=P3@+(>I%)d=zeeKCb^LC` zqR6|vW3-aT&gM+i0p&)KEdCVWy5LeLMXW3vVeG>iFk-^A7njA+d&d%vY=f2Si1{-3 zXaV&!UyYW5_~r7NYx${?joVn#>fUq0ne_E3)7a0 zDKF|;M#AurWtT$|f(%?foJpiEqt)X`{b*yCbgoP*XMW9BeM5d?<-$~f213-jpXNmi zottLtJ<_`RR6mAiGZZ?R-}6cX8^%kC%PG7)>V&Ego*I2Pv48);&#szvR zp#Q}R$@3ONov#XPFrYB-@#ui(H8=V?49;a2Q)whNtT&Fsu^=31^n4Ts(d@8_JV*u( z2m4@ezv2)DJ>3nbSvY=>IUAR6>*g69QFuWywd6(fWEhv-7j_R$JGXJN&k##DT>m}b zE=2vHDyY7(kusgpYV)aCV&MutCkRY2aZZFJ_W}(JnAn-7V!G*)x zxI^6}txzB@=N9q}bEga+l%aUd81J^3pY}=fc+Fk>{xf68!Wr}M#j(}*p@Eqd$VmT_ zGR+$_47(5`_do^!5JH~#!_c|eSX#QfvHf=8_-XMf6BT0T*)a!>lkUiUf}8y4x!c{`B}xop~*dQha$=ScxM73Xzd(d5|KE2@Gw0F>TDp1?ostdr>uz(NwNez&z;% z5{ZcryxS#V{XKa?$r{(thq(%0AO?*OxfSP0ybIsZq^@wA#7YMEwo^jzWOBFTZiq45EmQz^0~MB7`RlrS z;)wiW0;TfQ;=_em`crQ&tGIc)W1vMiiV0EO*BP z+x|EE{wXZktQflOMq8E*e2+V9Y%7bm`zf`2&K-&_FP2{aSjxVix&@b3rDSjl9=zPh ze7TX!#Pd)^Fo3>q{Fqxs*>E3Oii27mXECZt>p?_YQeJduzi}5x z8xkxTFO?C+5tU!f@1LLZ=_lCCkB6`nHt3B!C1DwCP+BCGnu9of5(~b8N(3%&UnAa- zA<$}vzb|(eH~76e>H&y>aB>uA#8=)_gRQ_YdH)ouKoi9Ms!9trwF*lS%6{QF{8a-E zssgXymaaf0pNO@I*>3!%YY$R0Aa*7aDuFvP@X%3c17wvS6`CKQ>}H_zCR9Fo0!?NU zx>B17zH;^%Jxld=NEl}rO+gu7K_s*=SbErn~!FlHZj&# zyS)~3%gRdhyS=1y&I%=;Ait8r{EY;C4zFz=8@27Qdwi*UsHP&zL_S_Q6Mez{G4oqZ zBWH&k{#|v#dWe>)_=c~Ms(8Owmu^1OPC;Qp2U(#bdcw21;J z1}1pH$GJUIE5{xcw{TAsziHHOr#ePi#g~^#%<^L+Oj?6-N=&tyoZ~=kwP&9d#{y~< z&QLn@i{+8!Zt3W5o7s(|?abuuup+E03*(Y=jG;Gn7QXGIY!g@#SQi#2&;1Mq{^=e% zFs*bTn##+z)Uym5IT~V6v#39Oom!r#5n6Rk9BM4kC(64!U8aIw_7#E8DI?!bn6gR)9i z%hWBb&o1y5AdGM>`qC0PX%d`uGWgi$cyB5zGvfsMW%OV&QVnkLx0V@5?h(4cAUF7j zQW^46L=I-WRvT%&a{?Ae1HlBpw&k_%M80Ees{*v0m1i~Y>kkYboRk!{;Xrc3$J0l^FHtyfOe&tCKYK7AE^+PwdcWhXwAYTq(_V++vX`9rkx_t#BPc(|LH9nua?1greFH6sL^A5Bq43QR>jGaLo)!VM91-^Zh$V;eK zSti>thLM|#@xmxWI^*Kp1=lF3SR4h{d$*0%*&7=Znykr!<_! z=o@wK7EnpAxm(Jb!CJ_7Nvg-U6qwj?N90A>MqW0|P2VL#k5s3-xk}^74}IQpEz~Ok zYlit~fvNLTqhAcC*O_TA@%b;_VPdAjr5GRK!gd9G_}d_e#rePpOu87Vsc>O zk>y_WR!NbgG!>{rX(2(N0ZED(_r(H3{3Z2b2fGw#O|@Nt^7)p+^aAQ5kA%$AEo$4d zWHw>8F*Od0UEOL&fnx{L4WYv5!wL=>-lE+$I^0|_0{l@G6Z)QAO0B|^4-V%-2Glj* zf?>5ORFaNLngZI}?D_{KlM8X)AhVhpaSdDL(lOX%U?qv~I~9+%WJ@?~#pxWL1R5H- zY>zS$0sZK$inB=T^N}(yYm%5=AJvp^+U3oUplC;yxtqKPGn%-R^VyhT)(hv3Yo?xX zXRyzkNW<&(c^+izh~}JW>q;o_fnv%z26~z3(Q#_Rva!b;Ah%hJ#P4^ z2xxTHTi6;J>$2=^_A-XIp5QhdJi+l#KP{ww z;8N7-=`6jn34#sPD^wF3WVN8%@T;cMV%blo8-KK zV96qZ6OC|D?&&;d7;%rGCFQn+hmR+9yF7DOzqt05yxhn8myY~aQt!7CHA=9#f{Knu zm@})#iFs4mUJPFX3Fe{SN5%a>D5t>V_>jZx&VP6+BwK~pt~U1tIZ)lWP6=W1cEV~v z_(IWh;x3-vG9#%jyzK;iFGg?nyv*Y{rew#InYWmicNAFoV{*$y{a(Wds3LLq>}aVa zcqMoobLjKU;YYr{`|GYk{SzXS-o*OZCyZ5M%wqU^DyaO$^a+Qho8Q96U$d8DcKFz= zkd{73jREa0dql)O99kZz7Q{*|}S%TbS@(t!PQ!&!dy} z>n#H3mt=#Pr#5rcz{8`ZqFrI;R|mIS8Mju>lNedJJE3XG_DO3yti2IsYT5>5oDcVI zSe5K{ESh+DdbOxIx=|wEMV=BHu9)Yh-f=_$#6J=V>n%7S+78ns)VEEcsn@96)0Bl4 z@y9iAL~B*K#Rv78Hqpkrh6;5{C(j1R+O=n<=T6tPwT=k;GdHSZ1aGq60skgHWBheTFE48g~{=CWrBL1IibbA(iug8k+7yaqdmdX|iOHnNt! z?7&ISgiw{!>XVT^ieXX-UI{`~sWaUfWwns$q)92KLo7RP0X@i03gVUelgNc~vcDvu zi~4@|D++&k_DC9TAyyZUboxF1p`d+w?4yM?jYoWFeNH)Q5APuN_zkidT6MdX|C$}2 z%=sK}u1~3^w1Vwsg&Gg4<61V*??SrXw)fj;W(Ix7{FNiHN(9+oCH|HMj4ZL0vQvuo zg#6w~Kw-|>zqI8pS>Q2hvLaWd=gH`X0&tZdfC{WKX7rmtZB83IQGeZ>YNz$-@eRuG zjGzFF-HcKePRm;Tk^Rc`G#xsZN}ruK_SJ1&+S=};w$^EafwA&qzj6H`73Yh#*Parj zEQY9Gzm2hskiM80IzfH9Y@CJ>?rfe zW#dAd_8A@vPgQ`=5b6**qp%WB=~n&8#r+t;c($AI!0?GVcBq&Og>;vhoOYKTI0se= zyYJJc*?lVRP_&Qh#wc$5voo(unj8n_6h&ulJMQOPjX|T zgMT*$$s64w0|2;xD3*<>tEGjyyQ{5}^&k2*s=OeI*{;1OsM8*dqynvpow$-*j*I*z z&RdMc$APWEYiWst>Uhd)r-;|6azj#M=IuLbw~I=ztqf3mTGsNd`a7RklgjDxW3MX= zUkr^>nzH!zj?}R$Wxug}m3M<5gW5ab_j53oJN9*>$d@!5j!Pmo2PkVykwa_lTVSOa zYI#CM@3`bl6kw?97yK5=r6aAST4VQ{=nCBghn z*37%;1_Me_^6?ca9iJkVZ2{fc6%5Sv#&Vq?7rzl@1C+wK16GUlb>A7tu!EiJ@-Jo)* z73@_qg~i!tdmj+U!GNPzzdSPlJ zt5uGPqrt}x@sjV-s_CSkIn=eoGk8>z;c0^!eG7JuET$!g5Z(r7*iq4oCYaE@Nu#9^ zWto^RFAU$7rrZx;vu<5`#wxBFl$SP8-|9xw%4b4F-~b06L0?fksYa_@kS#F596a z`vV!0wYD|&xq1#A2}vAnVK689d3~s*NoRLFyyst*?c(6&8}%-BEAa&&A+XO!LrVdH z&=S;+-$F7}*_GkS7BAU1`HD#gU?=ZB+7SI9mL)=#f>xjBOX%0!7sR-GScs9S!>`4H zTb%k~_ai}8v|@ji?OzoDS2>|c2Qmw?g(v|QBzbB9GFNd0IlHl$gIq2DC|3V3XA0>$ z-+0xR!R)x9D@qR-g=2MUbHzNJ@wQjrv2i$vSL}QSi1XqaK3}c)p2zB%NQ^|sHeN5X zZD(>B9enP-2RK?3VN23Oor#{x%O)}GaOmG^4+4`JyCUVqg04))sf9$Mz)IkvH6v|q z8>X&TZwpa3J%Sp(e#JHKX;k3^x*%^RXUM0Fw5V=3Sv|yT2J}#-ZeTOxjBqYOKYCN< z!>FA=>2~br!yvxR3&2j~PE|zwF`+kJbbFq=5S>_5e?M<2Fh}+Q?l>jb+xk1DL%P+Z zWER?X&g%B%k_5p_O8H?VjJ$al<7}rx(G+1Ug0?|dwBe_>yx6_2IjkMR3+0j8RrTNg>a^c9K;pVH|#T9KjKLM~2+=G*y!(v?|(BdyEV zEKi!-sSe?H$*oI`Jp1hQ8-{6P)>7GdQ|&xX`dcq&wku-6$%qDMUj>=ZVTvEL+Wcs= zClI17*exB_)Y6V4y2T?3z2?NzYY%j6eH{Y%V)WNC`fXaT?~0^k)?1!A!5$$Q#$0Oh z5p5N00+#K5mFRLB$*yll@qm@+CzGJ7sx zY*gU?CJeo8fGf!!4L`*<*RGR3aor#e1M!hv+r&(*u0Se0-FGDZc4(vZ%Lg>t1CHU= zV4F^?jVniU&R!nc$URo4ah3VGm5vB@`7Xl(&X6gL;@aEXEeRNel$f?I*`97XYs*2i zyX8vCNs*du^@Bju5ONpndc2JiPk>-48x}Mxc>TDpT6wIH z5BV&^MPyRF^g#%dyu@EOai2`G60BNMm5F>7h(PWLs#^?PF!MVAvs^q4p5KU0KUitC zvb}mtNXOkS$^8Ktuix`aP|z%p2;<*Z3H@hO|8xD9HA5;2e;4rg^$h<3{=B9@ROc_N z8-4};TI2l_+6u{d{ZjG$75w*7+n-PXz!m8?@c*aq_N$y<>o0#wDnj{xAMuZB%&$^@ zP1F4;r5Ed8lXkxf_;p(KrvObzRS41-e@?P~h5i~i{t0~#NgDoy{u)O9D&g;*_fI?k sP)!a1{KF6b3je#M{3~3T>M!s=?52tW9AtC>0A$D>0CDxSv_J3u59v8ZhX4Qo literal 7572 zcmaJ`2RK|?*B*=>qKw|6cZMKZ^d7wj6W!>&i|9n}y%Rx1i5f%=qJ@awdkBI=@BfIK z>)z!4-+rDm=gf1~T6^tx@3Y>u*H(Un07L^^w+-hQ?Vk^S{UE{a`i`d7PVCBm$)Nm{ zv21zQ^#%4$4juqN|67LbTCy~*O|Aos6|7I&?=3KkCt*_Mg>7g@bpY38GR10hpOUEN z>`>~iks$$|V6aK^)!BRNqdl%PEyem|14GED0Wa@&?{yYI?xPYC@MA`IV%mxzT@C7$Y3E8>hs>Zb&StFLOuU z=Elld{@(CFdJ1SugF883Z{2cy2C1&hVkhbrb^TFkL0vPztJBU?%hr z2>yb*$ZI_E_OznL)zpcyCALq$3j-SHzvf<3pTh?xv-)ua>wO zjJWu;#h|FcT&wGdChe0+cyn;>g_^C860BaAWeBU3jOj$|IB@`d71=boL@krzP!Hk8 zKYZ6#&R}fa1d`n6>q&m}LOxma>BUmWID1wu>>?NTe0ILRw)`PFn&JY8xQ?Fos%+08 z43~MiWJl;~Gd*N|5IW!N`NK4HzFpS5^XhQ9u;ucHBqBp;YqI{si;I)tWPP8*4@XK& z_NGD6=JGTRl2MShm&iuykWO0BC6=QWem62zLyekB6*2uCG<0a#3h~@LG za=C4`0Kugg8j0Lz5(tvjQ9FY$p=AUZ4Xox42`ps#naTm#oC%Pt44azD_h} zxNhf>L#yMgyqsMP3XC=3AN1ssxg#jyMJdDvP9k{VmUMIBeCQ{)FTOWSX-J9zPgT%X zC5+pg2qh8U;TV3V+rM>aYn;38=Qm?mH4!$mf_#DUq1dk}l&{W6I1af&f@5m6mn)fW zDC{63Fa|jzL*9+Yp}Dg8QBRm2JAOAAv6PxhrHQfE!+{APuJ|!VyX4S3b0+`G9t)dv zmG$Wzr|CIgoAAus$JwnH*}D65S#*uSv9l1&i(K)RXtGlY)+?xYy>(}q$Jm(BQX=vW zs;Ra9N5XnW7u@6pYcMd6-27!HpK#;?G`Gwj9dWEBc3}heRo(?kVb04k?!ZbcGPwjt z)H-x%fwi|}!9vwbkJG$j1j;v4i=v9QV_%%dvGS#Jx$wB}!$CF1-u-ax-Qe3*x4~ zSh^b}qhoSk6zwQ>JBxfnRCx~Tu>+baZ8gcta`mSCLe6RqHF(;}ePA+7Q%QpNUeNoj z(fKJkA7)}z%2B!`m4Sl7FLsbPrph^`e%bTut5zOVL-}8`oPif0Vn|^k_f?}_c$ovoP=0}M;qkH1`dr}jU;<-mi?k;|YvA(S4l!mq z>Gk*}(l0i-3GbcWMXVHX001wHT(SSjCdPJ-rq{u}AQocP4h98Z$@O@9O)+tb>l%_g zQ6TJp^QJL|)Jl6#G_Le?hoEF^@!hnkd$!=(ibBmqG24mMTEP%EV`Tp=YHzlP%)|%;4ki648eV(`nY?6`?{KAoRKnuWHUsf&obicdfqR4%BFF$7g`7@gikgwd^J(*6HHI8ibg9VB`D@sI88oa zv0yshoBnwB7n*-^D&`GYy4g8eIhmW9I^PoX4f2&mAw~N@002S?01*Bm`%~p7yOyTv z+AYao1zm-$9<^2PNVJ;c(7Tgs5`Ub_inM4AQ=pz+`LR=H!-i1Ely=C(?i;T5VU_~2 z@?5^9S)<^9YV)OXV=AkZ@pN3HUTN5@nc3CF_;B(>Z5q#XX{GA7mHubRkrysP-Wmr^ zRf&}~&R1orlhd`c^L&c&cBYBk)HTTh7Xvz@8e$ylmdE^yYc*DA<(r z?ljxEz8Xj>m^(<{tvcVD)yFz})onFe9}N~5`~;hE#A);$^k33#tJWp&hxwqLOzAb%HVi_GN1WWg zdym#Xwy8Z=OX9REXvS}uDfi0u_c+eBa4!|h`u z;21E#sAW1VtfMhRM*Wf9Yco6junA#(a?I+DW!>qhI~TcC`T3{nSEonXBkcv&Q-ygC zS-%;rX1&(yVse&tSVx27*va)=&sg`$WYee2#=a6mXq-oI zGB~1?l!Em=;<>xCslCKD5o$&(vJ`v>&G#78;Q;vxs9(?sNms`Mqov`*Hos;l zjeQ;kp)64TsY(0(J=|;cgAfkXh!mi+DKCEn5E>$PtR$DGc^121)Y@tFo+U&N<7725FXuvD?-+~ zgXU#pn}rb#uY&s0B3Ea3;2{>>L{+O8@m*t14kOOoM!&?_`#6NC~d2yAor>%{6(fl;uJH+{v0aE_= zhb0$d*5#Rs2??`;9(&?*`5+PA;lrR4_iN)>Y}hR)Dnx}D@~djP=cWYf;BCKAb&dtk zb1VXwaRR!Tz|FLKY*OO(Dhj+(dNlgsesH=ewAP{lw$Fz`hYehD2%q7Wg`W#aaDxbz zVGiRob+&G?_P5vN5b@HWh7H94gX(z~8>p^_NU)4c%@)_&*{aRf?^vH>$V;QYrH&iR z?s}&ib(|K9U{qNzB0Bu7j35ERUcFV)AS5k>T3$H=2{$1em5(w}x+|o0aXUzO(2@Au zZ;bY2rv9|eE8Av;;&BtBYotc6%bX88rFn*dw1^_T_BBhhFhqbBGH9vJ~%qvZUu`ab(S_ldu zeddXv(gYb9>olN@mkTa^&2DtJgLYr_l{ZwtLRGXzQ7{78dfxkJ)@?kTOn>Kv2z&pC zUI{Roohl-w*LgrAb)oQsvvZJ&O>uAWr!ZRgLCdF25mt=^TZVlpk|H@9K)cdX{J@dP zBv~E(^)SX=(3FhjqPd)^R`Ad}S5eg%Sr7zAw!YoZ@C}KQ#bL1YkN$NXf2 z?y`^2Lt?6*KtAhedn(JS?5O-Tg60KpW_Ln5u+U=V>XK6T(N;3?x!!ZprnZWR+<+va z#CEuvvk%^gL5zgNBC&9-u_vqSxdpjr-%VVZrr2}L^A#|c1x7&fecBc6VyMB%x^+4b z?6qoqIt!w;j3gbE#jq`^;Mnd#cmHqrX8R^2p#img0};F0>}&l15!Z&n2$Y2*E9(cJ z8X{1?FC0Gug|$TVe3jsI z)I27n*3mF(`GN1Vz0RL}#uSiGe-y>;pDb5dhFmR>SITpamp4Nn>*q(r9y2L;cS#w37Cu2JRN^oJ&sQkM8WuBJ(yo^GyuQjgOTVaR|1x*lK03ZB9FLHiT(v z<$tB;A^8X`IWXsGdR#jGRWMEBLCY-HS$Z@6E_te^N&0?n9j&`PiUsoEEHb&km3;%I z|L*R5vgoOs#|iSk7r6!mKT#T%h(*Ctg6n;@uDG48v#G7KzPg9KsT1USsn@9YDZU3| zohNo{CA?kr#j>12h^AK0d+Ph*K;bNB4T4rDY8xe_!_H7$qm{szS@s z6C@*DNV)XYQ?Az_>Ss2!Ep|YdN4Q*Fp1|WbV^A~WiYF^h%VZ;xoJ7p3a$MKx_B~1D zRimgey~^$vhWj_}{?7Jv0WjPtU_}xv818?UQw;6xuR+gL9kQJU-#NzBsP;7IWASseBEIKS3N0>&Q1d0&_yyJq5feI@Y_%!nE(PB-2R!^E9jc=SexMbd9B zu=iA)k^C^bOeoBs@F^mjJ4+|3?2LJh0;Q2rxVj0P(q`17A@GNq1QTLwKm63iy$Z}? z!~q-rZsqZ@1qgVM)iY_lZx#75;&92(@+GM1`0-Auk-N!0o~D)qCqs2Zj zMLA298xpNt;G_6_+4HT2cX(i3xltx#KRPC>NYq5t5kACmAane%=?O-lo*vGs3C(yi z{!tRy=Oa(_(5q#4chCJsocqy0)^;nOgzIUbv z#Vw0?*Ib5@FA2@SExWHhQvOCPqrF<5dT9th;_aHHH#pC=a92ArV?00zD|)Selk|iV z#khRsQSE{LuL=7z$NsBs^{r#eBkD^-E9zg9t5WdBPW4b$AT^kp+o_$Ltrh)Mo)_W{v|SuI_n-pWft z?id;>{#14Wo8BdhrO%o@0G&qf?u+lfFr*6J`+6)E5c@Q+QK+)_&LA;=?D6yN+WaAV zY^sxY!+~7HJ$)laT!)rA?c$H;j88xMa;uHf)zA+WaUlNOso_h)F_S8>t)bdXHYZ?$sBeTh&B2LL?b9g_*{@6Y?7?aG zn7L3e+xTpMH0)bK;dc7lo-Zvq?PDnnmJ?VV7IXuRlewXzsfn7iqlGQ>C#@D}_Q94t z);XT&k%R|#bi3<<))Yt~o{k58ALr9&9V1CrjkvJotFOZQpVNI_GZTrzmBQ2Xt33L) zW$(&ovU6Un+(u2v%zgO&Vzvnkh;X6e_!hD^**RcAs7(*XmZRQ(&pomG^i$OsVnS#w zh1ozNMSgb|5vYB811Cl^X$sUnR47ZWONHTfaZ3a(qt z8lCX+B+E;0^HYZFHD5J>vCEhQ`Cugx0s64YOyLY?tci2TXWoufiQU(3v}x9PEK?}S zh2Od#DwAYBVQUKKv#c@j_ZIq&u(jR|Um=8-1>BK)UJS>pn=JonG2x9hajW!dO({}X z)r!Y>#!#7~$ZIeL=F>W<`vnV}aL$g5ZCl_RzUg^9xT24C@|16_wt8roWgepHk)+Cj zHsXs^kI_;ZB1Wgz(sxj}J!v=xC=JBV#-yOYfa>;QQU@l%5#`5(F_`|H0`)U}FCN9+ z%|O088ju3d*CrRoX@NPzJr7QFW}p46Vnh9uZ}atL4ntea+6hG<6rkxiAZQLmtjEd( z8f;(nI@UP{rDbTOQEb;43(wITe_QRBJjNO=WTC8|d=~s}Xz&C0fzj7T--wpQ(o?>Z zI6X5~G&YmC$bh2}KT-#kD8i9fbIf&7KVn%brSd&9|| zFwW4j%kJNs@SAk8A%{=k0W6{;!It>lo8|q_Z$46`&$j(~Wn1mg=7K<2NX6`U#g=@# zyy@=ZETq$wWbi)o3d#~*o3vR(P^jD3-fF?;tq36oE8aWEj9{+%#9mT9>jyh`i7K#y zV#Ra=NXrNf>Qf$!Wn+$I5hkX$=0*&CjH6kh)ug)fT3q}=&_UvJi3<+Yf&7?UiHJl+ z@?qyVbups1eEsJ>g4QZUr1~~bw9J=2cI4y72H!1kgu~wxvzVFv0oHF^)3N9=6f zt+oMoJSe7$N=nsU5#QFGKy(1^~|f(7i26-Fk2{fZw-hw^nFj4#AMV-LU None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 460 @@ -226,12 +226,12 @@ def test_xlsx_parser_no_match_in_timeseries_from_mapping() -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 5 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 5 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 5 - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 5 + for name, column in parser.dataframe.items(): assert len(column) == 460 assert remove_ids(parser.to_dict(schema=dsms_schema)) == sort_entries( @@ -273,7 +273,7 @@ def test_csv_parser_config(config) -> None: metadata ) assert sort_entries(parser.to_dict()) == as_non_dsms_schema(metadata) - assert sorted(list(parser.time_series.columns)) == sorted(columns) + assert sorted(list(parser.dataframe.columns)) == sorted(columns) @pytest.mark.parametrize("extension", ["xlsx", "json", "csv", dict]) @@ -314,13 +314,13 @@ def test_parser_excel(extension) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 460 expected_graph = Graph() @@ -367,13 +367,13 @@ def test_parser_excel_inputs(input_kind) -> None: for row in parser.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(parser.time_series_metadata) == 6 - for row in parser.time_series_metadata: + assert len(parser.dataframe_metadata) == 6 + for row in parser.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(parser.time_series.columns) == 6 - assert sorted(list(parser.time_series.columns)) == sorted(columns) - for name, column in parser.time_series.items(): + assert len(parser.dataframe.columns) == 6 + assert sorted(list(parser.dataframe.columns)) == sorted(columns) + for name, column in parser.dataframe.items(): assert len(column) == 460 expected_graph = Graph() diff --git a/tests/abox/xls_pipeline_test/test_pipeline.py b/tests/abox/xls_pipeline_test/test_pipeline.py index 3f665967..4fa94df8 100644 --- a/tests/abox/xls_pipeline_test/test_pipeline.py +++ b/tests/abox/xls_pipeline_test/test_pipeline.py @@ -331,7 +331,7 @@ def test_xlsx_pipeline_config(config) -> None: metadata ) assert sort_entries(pipeline.to_dict()) == as_non_dsms_schema(metadata) - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) @pytest.mark.parametrize("extension", ["xlsx", "json", "csv", dict]) @@ -379,13 +379,13 @@ def test_excel_pipeline(extension) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 6 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 6 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 6 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 6 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 460 expected_graph = Graph() @@ -440,13 +440,13 @@ def test_excel_pipeline_inputs(input_kind) -> None: for row in pipeline.general_metadata: assert isinstance(row, QuantityGraph) or isinstance(row, PropertyGraph) - assert len(pipeline.time_series_metadata) == 6 - for row in pipeline.time_series_metadata: + assert len(pipeline.dataframe_metadata) == 6 + for row in pipeline.dataframe_metadata: assert isinstance(row, QuantityGraph) - assert len(pipeline.time_series.columns) == 6 - assert sorted(list(pipeline.time_series.columns)) == sorted(columns) - for name, column in pipeline.time_series.items(): + assert len(pipeline.dataframe.columns) == 6 + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns) + for name, column in pipeline.dataframe.items(): assert len(column) == 460 expected_graph = Graph() @@ -499,4 +499,4 @@ def test_excel_pipeline_suffix() -> None: metadata_suffix ) - assert sorted(list(pipeline.time_series.columns)) == sorted(columns_suffix) + assert sorted(list(pipeline.dataframe.columns)) == sorted(columns_suffix)