Skip to content

Commit 63550bd

Browse files
author
kgpayne
authored
Add sources staging model (#5)
* Added sources model and extracted additional columns from manifest json. * Dedupe artifacts, add docs and add dim_dbt__sources
1 parent 413068e commit 63550bd

File tree

7 files changed

+181
-33
lines changed

7 files changed

+181
-33
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
target/
33
dbt_modules/
44
logs/
5+
6+
.vscode/
7+
Pipfile
8+
Pipfile.lock

models/incremental/dim_dbt__models.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ fields as (
2525
command_invocation_id,
2626
artifact_generated_at,
2727
node_id,
28-
name,
28+
model_database,
2929
model_schema,
30+
name,
3031
depends_on_nodes,
3132
package_name,
3233
model_path,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{{ config( materialized='incremental', unique_key='manifest_source_id' ) }}
2+
3+
with dbt_sources as (
4+
5+
select * from {{ ref('stg_dbt__sources') }}
6+
7+
),
8+
9+
dbt_sources_incremental as (
10+
11+
select *
12+
from dbt_sources
13+
14+
{% if is_incremental() %}
15+
-- this filter will only be applied on an incremental run
16+
where artifact_generated_at > (select max(artifact_generated_at) from {{ this }})
17+
{% endif %}
18+
19+
),
20+
21+
fields as (
22+
23+
select
24+
manifest_source_id,
25+
command_invocation_id,
26+
artifact_generated_at,
27+
node_id,
28+
name,
29+
source_name,
30+
source_schema,
31+
package_name,
32+
relation_name,
33+
source_path
34+
from dbt_sources_incremental
35+
36+
)
37+
38+
select * from fields

models/schemas.yml

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -104,28 +104,54 @@ models:
104104
- name: env_*
105105
description: Columns for the environment variables set when the command was executed.
106106

107-
- name: dim_dbt__models
108-
description: All dbt model metadata from every manifest.json.
109-
columns:
110-
- name: manifest_model_id
111-
description: Primary key generated from the command_invocation_id and checksum.
112-
tests:
113-
- unique
114-
- not_null
115-
- name: command_invocation_id
116-
description: The id of the command which resulted in the source artifact's generation.
117-
- name: artifact_generated_at
118-
description: Timestamp of when the source artifact was generated.
119-
- name: node_id
120-
description: Unique id for the node, in the form of model.[package_name].[model_name]
121-
- name: name
122-
description: The model name.
123-
- name: model_schema
124-
- name: depends_on_nodes
125-
description: List of node ids the model depends on.
126-
- name: package_name
127-
- name: model_path
128-
description: Filepath of the model.
129-
- name: checksum
130-
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
131-
- name: model_materialization
107+
- name: dim_dbt__models
108+
description: All dbt model metadata from every manifest.json.
109+
columns:
110+
- name: manifest_model_id
111+
description: Primary key generated from the command_invocation_id and checksum.
112+
tests:
113+
- unique
114+
- not_null
115+
- name: command_invocation_id
116+
description: The id of the command which resulted in the source artifact's generation.
117+
- name: artifact_generated_at
118+
description: Timestamp of when the source artifact was generated.
119+
- name: node_id
120+
description: Unique id for the node, in the form of model.[package_name].[model_name]
121+
- name: name
122+
description: The model name.
123+
- name: model_schema
124+
- name: depends_on_nodes
125+
description: List of node ids the model depends on.
126+
- name: package_name
127+
- name: model_path
128+
description: Filepath of the model.
129+
- name: checksum
130+
description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
131+
- name: model_materialization
132+
133+
- name: dim_dbt__sources
134+
description: All dbt source metadata from every manifest.json.
135+
columns:
136+
- name: manifest_source_id
137+
description: Primary key generated from the command_invocation_id and checksum.
138+
tests:
139+
- unique
140+
- not_null
141+
- name: command_invocation_id
142+
description: The id of the command which resulted in the source artifact's generation.
143+
- name: artifact_generated_at
144+
description: Timestamp of when the source artifact was generated.
145+
- name: node_id
146+
description: Unique id for the node, in the form of model.[package_name].[model_name]
147+
- name: name
148+
description: The source node name.
149+
- name: source_name
150+
description: The name of the source.
151+
- name: source_schema
152+
- name: package_name
153+
description: Package source is defined in.
154+
- name: relation_name
155+
description: Name of the database entity this source resolved to.
156+
- name: source_path
157+
description: Filepath of the source.

models/staging/stg_dbt__artifacts.sql

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,38 @@ with base as (
88
fields as (
99

1010
select
11-
data,
11+
data:metadata:invocation_id::string as command_invocation_id,
1212
generated_at,
1313
path,
14-
artifact_type
14+
artifact_type,
15+
data
1516
from base
1617

18+
),
19+
20+
duduped as (
21+
22+
select
23+
*,
24+
row_number() over (
25+
partition by command_invocation_id, artifact_type
26+
order by generated_at desc
27+
) as index
28+
from fields
29+
qualify index = 1
30+
31+
),
32+
33+
artifacts as (
34+
35+
select
36+
command_invocation_id,
37+
generated_at,
38+
path,
39+
artifact_type,
40+
data
41+
from duduped
42+
1743
)
1844

19-
select * from fields
45+
select * from artifacts

models/staging/stg_dbt__models.sql

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ manifests as (
1616
flatten as (
1717

1818
select
19-
data:metadata:invocation_id::string as command_invocation_id,
19+
command_invocation_id,
2020
generated_at as artifact_generated_at,
2121
node.key as node_id,
22-
node.value:name::string as name,
22+
node.value:database::string as model_database,
2323
node.value:schema::string as model_schema,
24+
node.value:name::string as name,
2425
to_array(node.value:depends_on:nodes) as depends_on_nodes,
2526
node.value:package_name::string as package_name,
2627
node.value:path::string as model_path,
@@ -35,12 +36,13 @@ flatten as (
3536
surrogate_key as (
3637

3738
select
38-
{{ dbt_utils.surrogate_key(['command_invocation_id', 'checksum']) }} as manifest_model_id,
39+
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_model_id,
3940
command_invocation_id,
4041
artifact_generated_at,
4142
node_id,
42-
name,
43+
model_database,
4344
model_schema,
45+
name,
4446
depends_on_nodes,
4547
package_name,
4648
model_path,

models/staging/stg_dbt__sources.sql

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
with base as (
2+
3+
select *
4+
from {{ ref('stg_dbt__artifacts') }}
5+
6+
),
7+
8+
manifests as (
9+
10+
select *
11+
from base
12+
where artifact_type = 'manifest.json'
13+
14+
),
15+
16+
flatten as (
17+
18+
select
19+
command_invocation_id,
20+
generated_at as artifact_generated_at,
21+
node.key as node_id,
22+
node.value:name::string as name,
23+
node.value:source_name::string as source_name,
24+
node.value:schema::string as source_schema,
25+
node.value:package_name::string as package_name,
26+
node.value:relation_name::string as relation_name,
27+
node.value:path::string as source_path
28+
from manifests,
29+
lateral flatten(input => data:sources) as node
30+
where node.value:resource_type = 'source'
31+
32+
),
33+
34+
surrogate_key as (
35+
36+
select
37+
{{ dbt_utils.surrogate_key(['command_invocation_id', 'node_id']) }} as manifest_source_id,
38+
command_invocation_id,
39+
artifact_generated_at,
40+
node_id,
41+
name,
42+
source_name,
43+
source_schema,
44+
package_name,
45+
relation_name,
46+
source_path
47+
from flatten
48+
49+
)
50+
51+
select * from surrogate_key

0 commit comments

Comments
 (0)