From 9c7c75dfe72408be19ef6be1c3c00b1a6d9f9f48 Mon Sep 17 00:00:00 2001 From: tonyseale Date: Fri, 5 Jan 2024 14:50:20 +0000 Subject: [PATCH] update --- dprod.html | 134 ++++++++++++++++++++------------------------------- dprod.jsonld | 71 ++++++++++----------------- 2 files changed, 77 insertions(+), 128 deletions(-) diff --git a/dprod.html b/dprod.html index 11f6ccc..a0b29ac 100644 --- a/dprod.html +++ b/dprod.html @@ -38,15 +38,18 @@

Data Product Vocabulary (DPROD)

- The Data Mesh is an architectural and organizational paradigm that views data as a product, emphasizing domain-oriented decentralized data ownership and architecture. The Data Catalog (DCAT) Vocabulary is a W3C standard that allows publishers to describe datasets and data services in a decentralized way.

-

The Data Product (DPROD) specification defines a profile of DCAT, extending it to describe Data Products. DPROD follows two basic principles:

+ The Data Mesh is an architectural and organizational paradigm that views data as a product, emphasizing domain-oriented decentralized data ownership and architecture. The Data Catalog (DCAT) Vocabulary is a W3C standard that allows publishers to describe datasets and data services in a decentralized way. + The Data Product (DPROD) specification defines a profile of DCAT, extending it to describe Data Products.

+

DPROD follows two basic principles:

🔵 Decentralize Data Ownership: Efficiency in data integration necessitates task distribution among multiple teams. DCAT facilitates this by providing a standardised approach for decentralized dataset publication.

🔵 Harmonize Data Schemas: Shared ontologies can be used to harmonize decentralize schemas to consistent semantics. For example this shared DPROD ontology provides the semantics for defining what constitutes a Data Product.

- The DPROD specification extends DCAT by linking DCAT Data Services to DPROD Data Products. This enables a decentralized approach to publishing Data Products, facilitating federated searches for products across distributed sites using the same query mechanism and structure. The DPROD specification has four main aims: +
+ The DPROD specification extends DCAT by linking DCAT Data Services to DPROD Data Products. This enables a decentralized approach to publishing Data Products, facilitating federated searches for products across distributed sites using the same query mechanism and structure. +

The DPROD specification has four main aims:

🔵 To provide unambiguous and sharable semantics to answer the question: 'What is a data product?'

@@ -147,8 +150,8 @@

Data Product (DPROD) Model

  • Data Mesh (dcat:Catalog) - The collection of Data Products
  • Data Product (dprod:DataProduct) - A data product may have input and output ports, code and metadata
  • Port (dcat:DataService) - A digital interface that provides access to a Dataset. The can be a HTTP URL, a Database or a FileShare etc
  • -
  • Distribution (dcat:Distribution) - A specific representation of a dataset (CSV, JSON, ADLS etc) with it own physical mode if needed
  • -
  • Dataset (dcat:Dataset) - A collection of data related that conforms to a logical model
  • +
  • Distribution (dcat:Distribution) - A specific representation of a dataset (CSV, JSON, ADLS etc) which can conform to a physical model
  • +
  • Dataset (dcat:Dataset) - A collection of related data that can conform to a logical model
  • @@ -165,7 +168,7 @@

    Data Product (DPROD) Model

    "dataProductOwner": "https://www.linkedin.com/in/tonyseale/", "lifecycle" : "Consume", "outputPort": { - "@type": "RESTDataService", + "@type": "dcat:DataService", "dcat:endpointURL": "https://y.com/uk-10-year-bonds", "offersDistribution": { "@type": "dcat:Distribution", @@ -192,111 +195,82 @@

    DataProduct

    A data product is a rational, managed, and governed collection of data, with purpose, value and ownership, meeting consumer needs over a planned life-cycle.
    -

    dataProductOwner

    +

    lifecycle

    - + - + - + - +
    Identifier: dprod:dataProductOwner
    Identifier: dprod:lifecycle
    Label:Data Product Owner
    Label:lifecycleStatus
    Notes:The Agent that is overall accountable for the data product. This includes managing the data product along its lifecycle ( creation, usage, versioning, deletion). This can be different from the creator or the publisher of the Data Product
    Notes:The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ).
    Domain:https://ekgf.github.io/data-product-spec/dprod/DataProduct
    Range:http://purl.org/dc/terms/Agent
    Range:
    -

    inputPort

    +

    purpose

    - + + + - + - +
    Identifier: dprod:inputPort
    Identifier: dprod:purpose
    Label:
    Notes:an input port describes a set of services exposed by a data product to collect its source data and makes it available for further internal transformation. An input port can receive data from one or more upstream sources in a push (i.e. asynchronous subscription) or pop mode (i.e. synchronous query). Each data product may have one or more input ports
    Notes:A description of the objectives and intended usage of the data product.
    Domain:https://ekgf.github.io/data-product-spec/dprod/DataProduct
    Range:http://www.w3.org/ns/dcat#DataService
    Range:http://www.w3.org/2001/XMLSchema#string
    -

    outputPort

    +

    productionProcessDescription

    - + + + - + - +
    Identifier: dprod:outputPort
    Identifier: dprod:productionProcessDescription
    Label:
    Notes:an output port describes a set of services exposed by a data product to share the generated data in a way that can be understood and trusted. Each data product must have at least one or more output ports
    Notes:A description of how the data comprising the data product is gathered, refined, or managed.
    Domain:https://ekgf.github.io/data-product-spec/dprod/DataProduct
    Range:http://www.w3.org/ns/dcat#DataService
    Range:http://www.w3.org/2001/XMLSchema#string
    -

    lifecycle

    +

    domain

    - + + + - + - +
    Identifier: dprod:lifecycle
    Identifier: dprod:domain
    Label:
    Notes:The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ).
    Notes:The business or information area supported by the data product.
    Domain:https://ekgf.github.io/data-product-spec/dprod/DataProduct
    Range:http://www.w3.org/ns/dcat#DataProductLifecycle
    Range:
    -
    - -
    -

    Distribution

    - see http://www.w3.org/ns/dcat#Distribution -
    -

    belongsToDataset

    +

    port

    - - - - - - -
    Identifier: dprod:belongsToDataset
    Notes:The dataset that this distribution makes available
    Domain:http://www.w3.org/ns/dcat#Distribution
    Range:http://www.w3.org/ns/dcat#Dataset
    -
    + Identifier: dprod:port -
    - -
    -

    DataService

    - None - -
    -

    offersDistribution

    - - - + - - - + + +
    Identifier: dprod:offersDistribution
    Label:port
    Notes:The dataset distribution that is being offered through this Data Service
    Domain:http://www.w3.org/ns/dcat#DataService
    Range:http://www.w3.org/ns/dcat#Distribution
    Notes:a port describes a set of services exposed by a data product.
    Domain:https://ekgf.github.io/data-product-spec/dprod/DataProduct
    Range:http://www.w3.org/ns/dcat#DataService
    -
    -

    DatabaseDataService

    - Uses database-like access methods, including query e.g. JDBC, ODBC, SPARQL endpoint - -
    - -
    -

    Enumeration

    - The superclass of enumeration lists referenced from Data Product related artifacts - -
    -

    DataProductLifecycleStatus

    The lifecycle status of the Data Product taken from a control list ( Ideation, Design, Build, Deploy, Consume ). @@ -304,50 +278,44 @@

    DataProductLifecycleStatus

    -

    FileDataService

    - Uses file-like access methods. May or may not be streaming if the file is continuously wriitten to +

    DataProductShape

    + A data product is a rational, managed, and governed collection of data, with purpose, value and ownership, meeting consumer needs over a planned life-cycle.
    -

    CallbackDataService

    - Streams by making calls to a client-provided e.g. WebSockets +

    Protocol

    + A protocol, possibly including a specific version, used for communicating with a service
    -

    GraphQLDataService

    - Single REST endpoint, with structure given by GraphQL schema +

    Enumeration

    + The superclass of enumeration lists referenced from Data Product related artifacts
    -

    QueuingDataService

    - Streams using a queue or topic e.g. MQTT, Kafka, DDS +

    SecuritySchemaType

    + A security schema type used for authentication and communication.
    -

    ObjectDataService

    - Structured API, e.g. gRPC, CORBA, SOAP, ORM +

    DataServiceShape

    -
    - -
    -

    StreamingDataService

    - Data is continuously made available
    -

    RESTDataService

    - Accessed using http verbs with parameters, may be defined using OpenAPI +

    DatasetShape

    +
    -

    Dataset

    - None +

    DistributionShape

    +
    diff --git a/dprod.jsonld b/dprod.jsonld index 2b189c6..b6fe473 100644 --- a/dprod.jsonld +++ b/dprod.jsonld @@ -8,37 +8,31 @@ "dcat": "http://www.w3.org/ns/dcat#", "dcterms": "http://purl.org/dc/terms/", "sh": "http://www.w3.org/ns/shacl#", + "DistributionShape": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/DistributionShape" + }, + "Protocol": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/Protocol" + }, "DataProduct": { "@id": "https://ekgf.github.io/data-product-spec/dprod/DataProduct" }, - "dataProductOwner": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/dataProductOwner", - "@type": "http://purl.org/dc/terms/Agent" + "SecuritySchemaType": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/SecuritySchemaType" }, - "belongsToDataset": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/belongsToDataset", - "@type": "http://www.w3.org/ns/dcat#Dataset" + "purpose": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/purpose", + "@type": "http://www.w3.org/2001/XMLSchema#string" }, - "lifecycle": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/lifecycle", - "@type": "http://www.w3.org/ns/dcat#DataProductLifecycle" + "DataProductShape": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/DataProductShape" }, "offersDistribution": { "@id": "https://ekgf.github.io/data-product-spec/dprod/offersDistribution", "@type": "http://www.w3.org/ns/dcat#Distribution" }, - "inputPort": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/inputPort", - "@type": "http://www.w3.org/ns/dcat#DataService" - }, - "GraphQLDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/GraphQLDataService" - }, - "FileDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/FileDataService" - }, - "DatabaseDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/DatabaseDataService" + "DatasetShape": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/DatasetShape" }, "Enumeration": { "@id": "https://ekgf.github.io/data-product-spec/dprod/Enumeration" @@ -46,33 +40,20 @@ "DataProductLifecycleStatus": { "@id": "https://ekgf.github.io/data-product-spec/dprod/DataProductLifecycleStatus" }, - "outputPort": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/outputPort", - "@type": "http://www.w3.org/ns/dcat#DataService" - }, - "DataService": { - "@id": "http://www.w3.org/ns/dcat#DataService" - }, - "RESTDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/RESTDataService" - }, - "ObjectDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/ObjectDataService" - }, - "StreamingDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/StreamingDataService" - }, - "CallbackDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/CallbackDataService" + "belongsToDataset": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/belongsToDataset", + "@type": "http://www.w3.org/ns/dcat#Dataset" }, - "QueuingDataService": { - "@id": "https://ekgf.github.io/data-product-spec/dprod/QueuingDataService" + "productionProcessDescription": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/productionProcessDescription", + "@type": "http://www.w3.org/2001/XMLSchema#string" }, - "Distribution": { - "@id": "http://www.w3.org/ns/dcat#Distribution" + "DataServiceShape": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/DataServiceShape" }, - "Dataset": { - "@id": "http://www.w3.org/ns/dcat#Dataset" + "dataProductOwner": { + "@id": "https://ekgf.github.io/data-product-spec/dprod/dataProductOwner", + "@type": "http://xmlns.com/foaf/0.1/Agent" } } } \ No newline at end of file