From 9c7c75dfe72408be19ef6be1c3c00b1a6d9f9f48 Mon Sep 17 00:00:00 2001
From: tonyseale
Date: Fri, 5 Jan 2024 14:50:20 +0000
Subject: [PATCH] update
---
dprod.html | 134 ++++++++++++++++++++-------------------------------
dprod.jsonld | 71 ++++++++++-----------------
2 files changed, 77 insertions(+), 128 deletions(-)
diff --git a/dprod.html b/dprod.html
index 11f6ccc..a0b29ac 100644
--- a/dprod.html
+++ b/dprod.html
@@ -38,15 +38,18 @@
Data Product Vocabulary (DPROD)
- The Data Mesh is an architectural and organizational paradigm that views data as a product, emphasizing domain-oriented decentralized data ownership and architecture. The Data Catalog (DCAT) Vocabulary is a W3C standard that allows publishers to describe datasets and data services in a decentralized way.
- The Data Product (DPROD) specification defines a profile of DCAT, extending it to describe Data Products. DPROD follows two basic principles:
+ The Data Mesh is an architectural and organizational paradigm that views data as a product, emphasizing domain-oriented decentralized data ownership and architecture. The Data Catalog (DCAT) Vocabulary is a W3C standard that allows publishers to describe datasets and data services in a decentralized way.
+ The Data Product (DPROD) specification defines a profile of DCAT, extending it to describe Data Products.
+ DPROD follows two basic principles:
🔵 Decentralize Data Ownership: Efficiency in data integration necessitates task distribution among multiple teams. DCAT facilitates this by providing a standardised approach for decentralized dataset publication.
🔵 Harmonize Data Schemas: Shared ontologies can be used to harmonize decentralize schemas to consistent semantics. For example this shared DPROD ontology provides the semantics for defining what constitutes a Data Product.
- The DPROD specification extends DCAT by linking DCAT Data Services to DPROD Data Products. This enables a decentralized approach to publishing Data Products, facilitating federated searches for products across distributed sites using the same query mechanism and structure. The DPROD specification has four main aims:
+
+ The DPROD specification extends DCAT by linking DCAT Data Services to DPROD Data Products. This enables a decentralized approach to publishing Data Products, facilitating federated searches for products across distributed sites using the same query mechanism and structure.
+ The DPROD specification has four main aims:
🔵 To provide unambiguous and sharable semantics to answer the question: 'What is a data product?'
@@ -147,8 +150,8 @@ Data Product (DPROD) Model
Data Mesh (dcat:Catalog
) - The collection of Data Products
Data Product (dprod:DataProduct
) - A data product may have input and output ports, code and metadata
Port (dcat:DataService
) - A digital interface that provides access to a Dataset. The can be a HTTP URL, a Database or a FileShare etc
- Distribution (dcat:Distribution
) - A specific representation of a dataset (CSV, JSON, ADLS etc) with it own physical mode if needed
- Dataset (dcat:Dataset
) - A collection of data related that conforms to a logical model
+ Distribution (dcat:Distribution
) - A specific representation of a dataset (CSV, JSON, ADLS etc) which can conform to a physical model
+ Dataset (dcat:Dataset
) - A collection of related data that can conform to a logical model
@@ -165,7 +168,7 @@
Data Product (DPROD) Model
"dataProductOwner": "https://www.linkedin.com/in/tonyseale/",
"lifecycle" : "Consume",
"outputPort": {
- "@type": "RESTDataService",
+ "@type": "dcat:DataService",
"dcat:endpointURL": "https://y.com/uk-10-year-bonds",
"offersDistribution": {
"@type": "dcat:Distribution",
@@ -192,111 +195,82 @@ DataProduct
A data product is a rational, managed, and governed collection of data, with purpose, value and ownership, meeting consumer needs over a planned life-cycle.
- dataProductOwner
+ lifecycle
- Identifier: | dprod:dataProductOwner |
+ Identifier: | dprod:lifecycle |
- Label: | Data Product Owner |
+ Label: | lifecycleStatus |
- Notes: | The Agent that is overall accountable for the data product. This includes managing the data product along its lifecycle ( creation, usage, versioning, deletion). This can be different from the creator or the publisher of the Data Product |
+ Notes: | The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ). |
Domain: | https://ekgf.github.io/data-product-spec/dprod/DataProduct |
- Range: | http://purl.org/dc/terms/Agent |
+ Range: | |
- inputPort
+ purpose
- Identifier: | dprod:inputPort |
+ Identifier: | dprod:purpose |
+
+ Label: | |
- Notes: | an input port describes a set of services exposed by a data product to collect its source data and makes it available for further internal transformation. An input port can receive data from one or more upstream sources in a push (i.e. asynchronous subscription) or pop mode (i.e. synchronous query). Each data product may have one or more input ports |
+ Notes: | A description of the objectives and intended usage of the data product. |
Domain: | https://ekgf.github.io/data-product-spec/dprod/DataProduct |
- Range: | http://www.w3.org/ns/dcat#DataService |
+ Range: | http://www.w3.org/2001/XMLSchema#string |
- outputPort
+ productionProcessDescription
- Identifier: | dprod:outputPort |
+ Identifier: | dprod:productionProcessDescription |
+
+ Label: | |
- Notes: | an output port describes a set of services exposed by a data product to share the generated data in a way that can be understood and trusted. Each data product must have at least one or more output ports |
+ Notes: | A description of how the data comprising the data product is gathered, refined, or managed. |
Domain: | https://ekgf.github.io/data-product-spec/dprod/DataProduct |
- Range: | http://www.w3.org/ns/dcat#DataService |
+ Range: | http://www.w3.org/2001/XMLSchema#string |
- lifecycle
+ domain
- Identifier: | dprod:lifecycle |
+ Identifier: | dprod:domain |
+
+ Label: | |
- Notes: | The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ). |
+ Notes: | The business or information area supported by the data product. |
Domain: | https://ekgf.github.io/data-product-spec/dprod/DataProduct |
- Range: | http://www.w3.org/ns/dcat#DataProductLifecycle |
+ Range: | |
-
-
-
- Distribution
- see http://www.w3.org/ns/dcat#Distribution
-
- belongsToDataset
+ port
- Identifier: | dprod:belongsToDataset |
-
- Notes: | The dataset that this distribution makes available |
- Domain: | http://www.w3.org/ns/dcat#Distribution |
- Range: | http://www.w3.org/ns/dcat#Dataset |
-
-
-
+ Identifier: | dprod:port |
-
-
-
- DataService
- None
-
-
- offersDistribution
-
-
- Identifier: | dprod:offersDistribution |
+ Label: | port |
- Notes: | The dataset distribution that is being offered through this Data Service |
- Domain: | http://www.w3.org/ns/dcat#DataService |
- Range: | http://www.w3.org/ns/dcat#Distribution |
+ Notes: | a port describes a set of services exposed by a data product. |
+ Domain: | https://ekgf.github.io/data-product-spec/dprod/DataProduct |
+ Range: | http://www.w3.org/ns/dcat#DataService |
-
- DatabaseDataService
- Uses database-like access methods, including query e.g. JDBC, ODBC, SPARQL endpoint
-
-
-
-
- Enumeration
- The superclass of enumeration lists referenced from Data Product related artifacts
-
-
-
DataProductLifecycleStatus
The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ).
@@ -304,50 +278,44 @@ DataProductLifecycleStatus
- FileDataService
- Uses file-like access methods. May or may not be streaming if the file is continuously wriitten to
+ DataProductShape
+ A data product is a rational, managed, and governed collection of data, with purpose, value and ownership, meeting consumer needs over a planned life-cycle.
- CallbackDataService
- Streams by making calls to a client-provided e.g. WebSockets
+ Protocol
+ A protocol, possibly including a specific version, used for communicating with a service
- GraphQLDataService
- Single REST endpoint, with structure given by GraphQL schema
+ Enumeration
+ The superclass of enumeration lists referenced from Data Product related artifacts
- QueuingDataService
- Streams using a queue or topic e.g. MQTT, Kafka, DDS
+ SecuritySchemaType
+ A security schema type used for authentication and communication.
- ObjectDataService
- Structured API, e.g. gRPC, CORBA, SOAP, ORM
+ DataServiceShape
-
-
-
- StreamingDataService
- Data is continuously made available
- RESTDataService
- Accessed using http verbs with parameters, may be defined using OpenAPI
+ DatasetShape
+
- Dataset
- None
+ DistributionShape
+
diff --git a/dprod.jsonld b/dprod.jsonld
index 2b189c6..b6fe473 100644
--- a/dprod.jsonld
+++ b/dprod.jsonld
@@ -8,37 +8,31 @@
"dcat": "http://www.w3.org/ns/dcat#",
"dcterms": "http://purl.org/dc/terms/",
"sh": "http://www.w3.org/ns/shacl#",
+ "DistributionShape": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/DistributionShape"
+ },
+ "Protocol": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/Protocol"
+ },
"DataProduct": {
"@id": "https://ekgf.github.io/data-product-spec/dprod/DataProduct"
},
- "dataProductOwner": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/dataProductOwner",
- "@type": "http://purl.org/dc/terms/Agent"
+ "SecuritySchemaType": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/SecuritySchemaType"
},
- "belongsToDataset": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/belongsToDataset",
- "@type": "http://www.w3.org/ns/dcat#Dataset"
+ "purpose": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/purpose",
+ "@type": "http://www.w3.org/2001/XMLSchema#string"
},
- "lifecycle": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/lifecycle",
- "@type": "http://www.w3.org/ns/dcat#DataProductLifecycle"
+ "DataProductShape": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/DataProductShape"
},
"offersDistribution": {
"@id": "https://ekgf.github.io/data-product-spec/dprod/offersDistribution",
"@type": "http://www.w3.org/ns/dcat#Distribution"
},
- "inputPort": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/inputPort",
- "@type": "http://www.w3.org/ns/dcat#DataService"
- },
- "GraphQLDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/GraphQLDataService"
- },
- "FileDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/FileDataService"
- },
- "DatabaseDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/DatabaseDataService"
+ "DatasetShape": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/DatasetShape"
},
"Enumeration": {
"@id": "https://ekgf.github.io/data-product-spec/dprod/Enumeration"
@@ -46,33 +40,20 @@
"DataProductLifecycleStatus": {
"@id": "https://ekgf.github.io/data-product-spec/dprod/DataProductLifecycleStatus"
},
- "outputPort": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/outputPort",
- "@type": "http://www.w3.org/ns/dcat#DataService"
- },
- "DataService": {
- "@id": "http://www.w3.org/ns/dcat#DataService"
- },
- "RESTDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/RESTDataService"
- },
- "ObjectDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/ObjectDataService"
- },
- "StreamingDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/StreamingDataService"
- },
- "CallbackDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/CallbackDataService"
+ "belongsToDataset": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/belongsToDataset",
+ "@type": "http://www.w3.org/ns/dcat#Dataset"
},
- "QueuingDataService": {
- "@id": "https://ekgf.github.io/data-product-spec/dprod/QueuingDataService"
+ "productionProcessDescription": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/productionProcessDescription",
+ "@type": "http://www.w3.org/2001/XMLSchema#string"
},
- "Distribution": {
- "@id": "http://www.w3.org/ns/dcat#Distribution"
+ "DataServiceShape": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/DataServiceShape"
},
- "Dataset": {
- "@id": "http://www.w3.org/ns/dcat#Dataset"
+ "dataProductOwner": {
+ "@id": "https://ekgf.github.io/data-product-spec/dprod/dataProductOwner",
+ "@type": "http://xmlns.com/foaf/0.1/Agent"
}
}
}
\ No newline at end of file