Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
05595a7
Initial submission of the RDF ingestion source
stephengoldbaum Dec 1, 2025
2c009bc
feat(ingestion): Implement deferred processing for structured propert…
stephengoldbaum Dec 1, 2025
1e874d2
feat(ingestion): Enhance entity processing with dependency management
stephengoldbaum Dec 1, 2025
b8fd803
Merge branch 'datahub-project:master' into master
stephengoldbaum Dec 3, 2025
9dc4066
Refactored to eliminate everything but glossaries.
stephengoldbaum Dec 4, 2025
50bd26a
feat(ingestion): add comprehensive capability summary for various dat…
stephengoldbaum Dec 4, 2025
8579f1d
refactor(entities): remove DomainMCPBuilder and clarify domain handling
stephengoldbaum Dec 4, 2025
aa46687
Merge branch 'datahub-project:master' into master
stephengoldbaum Dec 4, 2025
48913ef
chore(autogenerated): remove obsolete capability summary and lineage …
stephengoldbaum Dec 4, 2025
e78fc09
cleanup
stephengoldbaum Dec 4, 2025
c632ff9
removed non-ingestion source code
stephengoldbaum Dec 4, 2025
b42eb70
refactor(ingestion): remove query and target factory components
stephengoldbaum Dec 4, 2025
e1959b7
delete(rdf): remove RDF README and SHACL migration guide files
stephengoldbaum Dec 4, 2025
fe51da8
Added from UI for RDF ingestion
stephengoldbaum Dec 4, 2025
079853e
fix(docs): update RDF specification links for entity-specific documents
stephengoldbaum Dec 4, 2025
0c89279
Merge branch 'master' into master
stephengoldbaum Dec 4, 2025
79f9ea9
refactor(constants): reorganize RDF imports and update tooltip format…
stephengoldbaum Dec 4, 2025
1255ffa
refactor(rdf): enhance type annotations and improve error handling
stephengoldbaum Dec 4, 2025
c4f33c5
refactor(rdf): restructure target interface and remove unused components
stephengoldbaum Dec 6, 2025
e90c241
Merge branch 'master' into master
stephengoldbaum Dec 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,691 changes: 3,691 additions & 0 deletions autogenerated/capability_summary.json

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -371,5 +371,13 @@
"description": "Import Charts and Dashboards from Preset",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/preset/",
"recipe": "source:\n type: preset\n config:\n # Coordinates\n connect_uri: Preset workspace URL\n manager_uri: https://api.app.preset.io\n\n # Credentials\n api_key: Preset API Key\n api_secret: Preset API Secret"
},
{
"urn": "urn:li:dataPlatform:rdf",
"name": "rdf",
"displayName": "RDF",
"description": "Import glossary terms, term groups, and relationships from RDF/OWL ontologies (SKOS, Turtle, RDF/XML).",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/rdf",
"recipe": "source:\n type: rdf\n config:\n source: path/to/glossary.ttl\n environment: PROD\n export_only:\n - glossary"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,14 @@ import {
PRESTO_PASSWORD,
PRESTO_USERNAME,
} from '@app/ingestV2/source/builder/RecipeForm/presto';
import {
RDF_DIALECT,
RDF_ENVIRONMENT,
RDF_EXTENSIONS,
RDF_FORMAT,
RDF_RECURSIVE,
RDF_SOURCE,
} from '@app/ingestV2/source/builder/RecipeForm/rdf';
import {
REDSHIFT_DATABASE,
REDSHIFT_HOST_PORT,
Expand Down Expand Up @@ -239,6 +247,7 @@ import {
MYSQL,
OKTA,
POWER_BI,
RDF,
SAC,
VERTICA,
} from '@app/ingestV2/source/builder/constants';
Expand Down Expand Up @@ -586,6 +595,13 @@ export const RECIPE_FIELDS: RecipeFields = {
],
advancedFields: [STATEFUL_INGESTION_ENABLED],
},
[RDF]: {
fields: [RDF_SOURCE],
filterFields: [],
advancedFields: [RDF_FORMAT, RDF_EXTENSIONS, RDF_RECURSIVE, RDF_ENVIRONMENT, RDF_DIALECT],
connectionSectionTooltip: 'Configure the RDF source location and basic settings.',
advancedSectionTooltip: 'Advanced options for RDF format, file processing, and dialect selection.',
},
};

export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS));
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { FieldType, RecipeField } from '@app/ingestV2/source/builder/RecipeForm/common';

export const RDF_SOURCE: RecipeField = {
name: 'source',
label: 'Source',
tooltip:
'Source to process: file path, folder path, server URL, or comma-separated files. Examples: /path/to/file.ttl, /path/to/folder, https://example.com/data.ttl, file1.ttl,file2.ttl',
type: FieldType.TEXT,
fieldPath: 'source.config.source',
placeholder: '/path/to/file.ttl or /path/to/folder or https://example.com/data.ttl',
required: true,
rules: null,
};

export const RDF_FORMAT: RecipeField = {
name: 'format',
label: 'RDF Format',
tooltip: 'RDF format (auto-detected if not specified). Examples: turtle, xml, n3, nt, json-ld',
type: FieldType.SELECT,
fieldPath: 'source.config.format',
placeholder: 'Auto-detect',
options: [
{ label: 'Auto-detect', value: '' },
{ label: 'Turtle', value: 'turtle' },
{ label: 'RDF/XML', value: 'xml' },
{ label: 'N3', value: 'n3' },
{ label: 'N-Triples', value: 'nt' },
{ label: 'JSON-LD', value: 'json-ld' },
],
rules: null,
};

export const RDF_EXTENSIONS: RecipeField = {
name: 'extensions',
label: 'File Extensions',
tooltip: 'File extensions to process when source is a folder. Default: .ttl, .rdf, .owl, .n3, .nt',
type: FieldType.LIST,
fieldPath: 'source.config.extensions',
placeholder: '.ttl',
buttonLabel: 'Add extension',
rules: null,
};

export const RDF_RECURSIVE: RecipeField = {
name: 'recursive',
label: 'Recursive Folder Processing',
tooltip: 'Enable recursive folder processing when source is a folder (default: true)',
type: FieldType.BOOLEAN,
fieldPath: 'source.config.recursive',
rules: null,
};

export const RDF_ENVIRONMENT: RecipeField = {
name: 'environment',
label: 'DataHub Environment',
tooltip: 'DataHub environment (PROD, DEV, TEST, etc.)',
type: FieldType.SELECT,
fieldPath: 'source.config.environment',
placeholder: 'PROD',
options: [
{ label: 'PROD', value: 'PROD' },
{ label: 'DEV', value: 'DEV' },
{ label: 'TEST', value: 'TEST' },
{ label: 'UAT', value: 'UAT' },
],
rules: null,
};

export const RDF_DIALECT: RecipeField = {
name: 'dialect',
label: 'RDF Dialect',
tooltip: 'Force a specific RDF dialect (default: auto-detect). Options: default, fibo, generic',
type: FieldType.SELECT,
fieldPath: 'source.config.dialect',
placeholder: 'Auto-detect',
options: [
{ label: 'Auto-detect', value: '' },
{ label: 'Default', value: 'default' },
{ label: 'FIBO', value: 'fibo' },
{ label: 'Generic', value: 'generic' },
],
rules: null,
};
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ export const VERTEX_AI = 'vertexai';
export const VERTEXAI_URN = `urn:li:dataPlatform:${VERTEX_AI}`;
export const SNAPLOGIC = 'snaplogic';
export const SNAPLOGIC_URN = `urn:li:dataPlatform:${SNAPLOGIC}`;
export const RDF = 'rdf';
export const RDF_URN = `urn:li:dataPlatform:${RDF}`;

export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,5 +371,13 @@
"description": "Import Charts and Dashboards from Preset",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/preset/",
"recipe": "source:\n type: preset\n config:\n # Coordinates\n connect_uri: Preset workspace URL\n manager_uri: https://api.app.preset.io\n\n # Credentials\n api_key: Preset API Key\n api_secret: Preset API Secret"
},
{
"urn": "urn:li:dataPlatform:rdf",
"name": "rdf",
"displayName": "RDF",
"description": "Import glossary terms, term groups, and relationships from RDF/OWL ontologies (SKOS, Turtle, RDF/XML).",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/rdf",
"recipe": "source:\n type: rdf\n config:\n source: path/to/glossary.ttl\n environment: PROD\n export_only:\n - glossary"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -446,5 +446,15 @@
"recipe": "source:\n type: hex\n config:\n workspace_name: # Your Hex Workspace name\n token: # Your PAT or Workspace token",
"category": "Other",
"isPopular": false
},
{
"urn": "urn:li:dataPlatform:rdf",
"name": "rdf",
"displayName": "RDF",
"description": "Import glossary terms, term groups, and relationships from RDF/OWL ontologies (SKOS, Turtle, RDF/XML).",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/rdf",
"recipe": "source:\n type: rdf\n config:\n source: path/to/glossary.ttl\n environment: PROD\n export_only:\n - glossary",
"category": "Other",
"isPopular": false
}
]
4 changes: 2 additions & 2 deletions docker/datahub-actions/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ RUN --mount=type=bind,source=./python-build/version_updater.py,target=/version_u

# Install metadata-ingestion with base extras (network enabled, can install more at runtime)
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000,id=datahub-actions \
uv pip install -e '/metadata-ingestion/[base,s3,gcs,abs]'
uv pip install -e '/metadata-ingestion/[base,s3,gcs,abs,rdf]'

# Install datahub-actions with all extras
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000,id=datahub-actions \
Expand Down Expand Up @@ -355,7 +355,7 @@ RUN --mount=type=bind,source=./python-build/version_updater.py,target=/version_u

# Install metadata-ingestion with SLIM extras (no PySpark, network enabled for flexibility)
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000,id=datahub-actions \
uv pip install -e '/metadata-ingestion/[base,s3-slim,gcs-slim,abs-slim]'
uv pip install -e '/metadata-ingestion/[base,s3-slim,gcs-slim,abs-slim,rdf]'

# Install datahub-actions with all extras
RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000,id=datahub-actions \
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ RUN --mount=type=bind,source=./python-build/version_updater.py,target=/version_u
FROM add-code AS install-slim

RUN --mount=type=cache,target=$HOME/.cache/uv,uid=1000,gid=1000 \
UV_LINK_MODE=copy uv pip install -e "/metadata-ingestion/[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,s3-slim,gcs-slim,abs-slim,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" && \
UV_LINK_MODE=copy uv pip install -e "/metadata-ingestion/[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,s3-slim,gcs-slim,abs-slim,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary,rdf]" && \
datahub --version

FROM add-code AS install-full
Expand Down
Loading
Loading