-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
42 changed files
with
1,861 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -159,3 +159,4 @@ cython_debug/ | |
.vscode/ | ||
|
||
.DS_Store | ||
.python-version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
"""OpenLineage models & facets. | ||
Currently, openlineage-python [does not support](https://github.com/OpenLineage/OpenLineage/issues/2629) deserialization from JSON. | ||
So we have to write our own deserialization logic. | ||
Also FastStream support only ``pydantic`` models whether openlineage-python provides ``attrs`` models. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from pydantic import BaseModel, ConfigDict | ||
|
||
|
||
class OpenLineageBase(BaseModel): | ||
"""Base class for all OpenLineage models.""" | ||
|
||
model_config = ConfigDict(extra="ignore", frozen=True, arbitrary_types_allowed=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from pydantic import Field | ||
|
||
from data_rentgen.consumer.openlineage.base import OpenLineageBase | ||
from data_rentgen.consumer.openlineage.dataset_facets import ( | ||
OpenLineageDatasetFacetsDict, | ||
OpenLineageInputDatasetFacetsDict, | ||
OpenLineageOutputDatasetFacetsDict, | ||
) | ||
|
||
|
||
class OpenLineageDataset(OpenLineageBase): | ||
"""Generic dataset model. | ||
See [Dataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
namespace: str = Field(json_schema_extra={"format": "uri"}) | ||
name: str | ||
facets: OpenLineageDatasetFacetsDict = Field(default_factory=OpenLineageDatasetFacetsDict) # type: ignore[arg-type] | ||
|
||
|
||
class OpenLineageInputDataset(OpenLineageDataset): | ||
"""Input dataset model. | ||
See [InputDataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
inputFacets: OpenLineageInputDatasetFacetsDict = Field(default_factory=OpenLineageInputDatasetFacetsDict) # type: ignore[arg-type] | ||
|
||
|
||
class OpenLineageOutputDataset(OpenLineageDataset): | ||
"""Output dataset model. | ||
See [OutputDataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
outputFacets: OpenLineageOutputDatasetFacetsDict = Field(default_factory=OpenLineageOutputDatasetFacetsDict) # type: ignore[arg-type] |
88 changes: 88 additions & 0 deletions
88
data_rentgen/consumer/openlineage/dataset_facets/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from typing import TypedDict | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.dataquality_metrics import ( | ||
OpenLineageDataQualityMetricsInputDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.datasource import ( | ||
OpenLineageDatasourceDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.documentation import ( | ||
OpenLineageDocumentationDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.lifecycle_change import ( | ||
OpenLineageDatasetLifecycleStateChange, | ||
OpenLineageDatasetPreviousIdentifier, | ||
OpenLineageLifecycleStateChangeDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.output_statistics import ( | ||
OpenLineageOutputStatisticsOutputDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.schema import ( | ||
OpenLineageSchemaDatasetFacet, | ||
OpenLineageSchemaField, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.storage import ( | ||
OpenLineageStorageDatasetFacet, | ||
) | ||
from data_rentgen.consumer.openlineage.dataset_facets.symlinks import ( | ||
OpenLineageSymlinkIdentifier, | ||
OpenLineageSymlinksDatasetFacet, | ||
OpenLineageSymlinkType, | ||
) | ||
|
||
__all__ = [ | ||
"OpenLineageDatasetFacet", | ||
"OpenLineageDataQualityMetricsInputDatasetFacet", | ||
"OpenLineageDatasourceDatasetFacet", | ||
"OpenLineageDocumentationDatasetFacet", | ||
"OpenLineageLifecycleStateChangeDatasetFacet", | ||
"OpenLineageDatasetPreviousIdentifier", | ||
"OpenLineageDatasetLifecycleStateChange", | ||
"OpenLineageOutputStatisticsOutputDatasetFacet", | ||
"OpenLineageSchemaDatasetFacet", | ||
"OpenLineageSchemaField", | ||
"OpenLineageStorageDatasetFacet", | ||
"OpenLineageSymlinksDatasetFacet", | ||
"OpenLineageSymlinkType", | ||
"OpenLineageSymlinkIdentifier", | ||
"OpenLineageDatasetFacetsDict", | ||
"OpenLineageInputDatasetFacetsDict", | ||
"OpenLineageOutputDatasetFacetsDict", | ||
] | ||
|
||
|
||
class OpenLineageDatasetFacetsDict(TypedDict, total=False): | ||
"""All possible dataset facets. | ||
See [Dataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
documentation: OpenLineageDocumentationDatasetFacet | ||
dataQualityMetrics: OpenLineageDataQualityMetricsInputDatasetFacet | ||
dataSource: OpenLineageDatasourceDatasetFacet | ||
lifecycleStateChange: OpenLineageLifecycleStateChangeDatasetFacet | ||
outputStatistics: OpenLineageOutputStatisticsOutputDatasetFacet | ||
schema: OpenLineageSchemaDatasetFacet | ||
storage: OpenLineageStorageDatasetFacet | ||
symlinks: OpenLineageSymlinksDatasetFacet | ||
|
||
|
||
class OpenLineageInputDatasetFacetsDict(TypedDict, total=False): | ||
"""All possible input dataset facets. | ||
See [InputDataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
dataQualityMetrics: OpenLineageDataQualityMetricsInputDatasetFacet | ||
|
||
|
||
class OpenLineageOutputDatasetFacetsDict(TypedDict, total=False): | ||
"""All possible output dataset facets. | ||
See [InputDataset](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
outputStatistics: OpenLineageOutputStatisticsOutputDatasetFacet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from data_rentgen.consumer.openlineage.base import OpenLineageBase | ||
|
||
|
||
class OpenLineageDatasetFacet(OpenLineageBase): | ||
"""Base class for all dataset facets. | ||
See [DatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
|
||
class OpenLineageInputDatasetFacet(OpenLineageDatasetFacet): | ||
"""Base class for input dataset facets. | ||
See [DatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" | ||
|
||
|
||
class OpenLineageOutputDatasetFacet(OpenLineageDatasetFacet): | ||
"""Base class for output dataset facets. | ||
See [DatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/OpenLineage.json). | ||
""" |
20 changes: 20 additions & 0 deletions
20
data_rentgen/consumer/openlineage/dataset_facets/dataquality_metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from pydantic import Field | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageOutputDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageDataQualityMetricsInputDatasetFacet(OpenLineageOutputDatasetFacet): | ||
"""Dataset facet describing data quality metrics. | ||
See [DataQualityMetricsInputDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/DataQualityMetricsInputDatasetFacet.json). | ||
Note: `columnMetrics` are ignored. | ||
""" | ||
|
||
rows: int | None = Field(default=None, alias="rowCount") | ||
bytes: int | None = None | ||
files: int | None = Field(default=None, alias="fileCount") |
15 changes: 15 additions & 0 deletions
15
data_rentgen/consumer/openlineage/dataset_facets/datasource.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageDatasourceDatasetFacet(OpenLineageDatasetFacet): | ||
"""Dataset facet describing data source information. | ||
See [DatasourceDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/DatasourceDatasetFacet.json). | ||
""" | ||
|
||
name: str | ||
uri: str |
14 changes: 14 additions & 0 deletions
14
data_rentgen/consumer/openlineage/dataset_facets/documentation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageDocumentationDatasetFacet(OpenLineageDatasetFacet): | ||
"""Dataset facet describing documentation. | ||
See [DocumentationDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/DocumentationDatasetFacet.json). | ||
""" | ||
|
||
description: str |
43 changes: 43 additions & 0 deletions
43
data_rentgen/consumer/openlineage/dataset_facets/lifecycle_change.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from enum import Enum | ||
|
||
from data_rentgen.consumer.openlineage.base import OpenLineageBase | ||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageDatasetLifecycleStateChange(str, Enum): | ||
"""Lifecycle state change type. | ||
See [LifecycleStateChangeDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/LifecycleStateChangeDatasetFacet.json). | ||
""" | ||
|
||
ALTER = "ALTER" | ||
CREATE = "CREATE" | ||
DROP = "DROP" | ||
OVERWRITE = "OVERWRITE" | ||
RENAME = "RENAME" | ||
TRUNCATE = "TRUNCATE" | ||
|
||
def __str__(self) -> str: | ||
return self.value | ||
|
||
|
||
class OpenLineageDatasetPreviousIdentifier(OpenLineageBase): | ||
"""Previous identifier information. Used only if `lifecycleStateChange=RENAME`. | ||
See [LifecycleStateChangeDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/LifecycleStateChangeDatasetFacet.json). | ||
""" | ||
|
||
namespace: str | ||
name: str | ||
|
||
|
||
class OpenLineageLifecycleStateChangeDatasetFacet(OpenLineageDatasetFacet): | ||
"""Dataset facet describing lifecycle state change. | ||
See [LifecycleStateChangeDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/LifecycleStateChangeDatasetFacet.json). | ||
""" | ||
|
||
lifecycleStateChange: OpenLineageDatasetLifecycleStateChange | ||
previousIdentifier: OpenLineageDatasetPreviousIdentifier | None = None |
18 changes: 18 additions & 0 deletions
18
data_rentgen/consumer/openlineage/dataset_facets/output_statistics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from pydantic import Field | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageOutputDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageOutputStatisticsOutputDatasetFacet(OpenLineageOutputDatasetFacet): | ||
"""Dataset facet describing output statistics. | ||
See [OutputStatisticsOutputDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/OutputStatisticsOutputDatasetFacet.json). | ||
""" | ||
|
||
rows: int | None = Field(default=None, alias="rowCount") | ||
bytes: int | None = Field(default=None, alias="size") | ||
files: int | None = Field(default=None, alias="fileCount") |
28 changes: 28 additions & 0 deletions
28
data_rentgen/consumer/openlineage/dataset_facets/schema.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from __future__ import annotations | ||
|
||
from data_rentgen.consumer.openlineage.base import OpenLineageBase | ||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageSchemaField(OpenLineageBase): | ||
"""Dataset field information. | ||
See [SchemaDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/SchemaDatasetFacet.json). | ||
""" | ||
|
||
name: str | ||
type: str | None = None | ||
description: str | None = None | ||
fields: list[OpenLineageSchemaField] | None = None | ||
|
||
|
||
class OpenLineageSchemaDatasetFacet(OpenLineageDatasetFacet): | ||
"""Dataset facet describing schema. | ||
See [SchemaDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/SchemaDatasetFacet.json). | ||
""" | ||
|
||
fields: list[OpenLineageSchemaField] |
15 changes: 15 additions & 0 deletions
15
data_rentgen/consumer/openlineage/dataset_facets/storage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# SPDX-FileCopyrightText: 2024 MTS PJSC | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from data_rentgen.consumer.openlineage.dataset_facets.base import ( | ||
OpenLineageDatasetFacet, | ||
) | ||
|
||
|
||
class OpenLineageStorageDatasetFacet(OpenLineageDatasetFacet): | ||
"""Dataset facet describing storage information. | ||
See [StorageDatasetFacet](https://github.com/OpenLineage/OpenLineage/blob/main/spec/facets/StorageDatasetFacet.json). | ||
""" | ||
|
||
storageLayer: str | ||
fileFormat: str |
Oops, something went wrong.