Skip to content

Commit d3271c8

Browse files
authored
Merge pull request #364 from the4thamigo-uk/multiple-materialized-views_to_same_target
Allow multiple materialized views to write to same target (#280)
2 parents bd45c3c + 0b54910 commit d3271c8

File tree

4 files changed

+323
-24
lines changed

4 files changed

+323
-24
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
* Added support for [range_hashed](https://clickhouse.com/docs/en/sql-reference/dictionaries#range_hashed) and [complex_key_range_hashed](https://clickhouse.com/docs/en/sql-reference/dictionaries#complex_key_range_hashed) layouts to the dictionary materialization. ([#361](https://github.com/ClickHouse/dbt-clickhouse/pull/361))
44
* Truncated stack trace for database errors for cleaner output when HIDE_STACK_TRACE variable is set to any value.
55

6+
### New Features
7+
* Added support for the creation of more than one materialized view inserting records into the same target table. ([#360](https://github.com/ClickHouse/dbt-clickhouse/pull/364))
8+
69
### Release [1.8.4], 2024-09-17
710
### Improvement
811
* The S3 help macro now support a `role_arn` parameter as an alternative way to provide authentication for S3 based models. Thanks to

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,27 @@ no corresponding REFRESH operation). Instead, it acts as an "insert trigger", a
246246
(https://github.com/ClickHouse/dbt-clickhouse/blob/main/tests/integration/adapter/materialized_view/test_materialized_view.py) for an introductory example
247247
of how to use this functionality.
248248

249+
Clickhouse provides the ability for more than one materialized view to write records to the same target table. To support this in dbt-clickhouse, you can construct a `UNION` in your model file, such that the SQL for each of your materialized views is wrapped with comments of the form `--my_mv_name:begin` and `--my_mv_name:end`.
250+
251+
For example the following will build two materialized views both writing data to the same destination table of the model. The names of the materialized views will take the form `<model_name>_mv1` and `<model_name>_mv2` :
252+
253+
```
254+
--mv1:begin
255+
select a,b,c from {{ source('raw', 'table_1') }}
256+
--mv1:end
257+
union all
258+
--mv2:begin
259+
select a,b,c from {{ source('raw', 'table_2') }}
260+
--mv2:end
261+
```
262+
263+
> IMPORTANT!
264+
>
265+
> When updating a model with multiple materialized views (MVs), especially when renaming one of the MV names, dbt-clickhouse does not automatically drop the old MV. Instead,
266+
> you will encounter the following warning: `Warning - Table <previous table name> was detected with the same pattern as model name <your model name> but was not found in this run. In case it is a renamed mv that was previously part of this model, drop it manually (!!!) `
267+
268+
269+
249270
# Dictionary materializations (experimental)
250271
See the tests in https://github.com/ClickHouse/dbt-clickhouse/blob/main/tests/integration/adapter/dictionary/test_dictionary.py for examples of how to
251272
implement materializations for ClickHouse dictionaries

dbt/include/clickhouse/macros/materializations/materialized_view.sql

Lines changed: 83 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
{%- materialization materialized_view, adapter='clickhouse' -%}
77

88
{%- set target_relation = this.incorporate(type='table') -%}
9-
{%- set mv_relation = target_relation.derivative('_mv', 'materialized_view') -%}
109
{%- set cluster_clause = on_cluster_clause(target_relation) -%}
1110

1211
{# look for an existing relation for the target table and create backup relations if necessary #}
@@ -35,16 +34,57 @@
3534
-- `BEGIN` happens here:
3635
{{ run_hooks(pre_hooks, inside_transaction=True) }}
3736

37+
-- extract the names of the materialized views from the sql
38+
{% set view_names = modules.re.findall('--([^:]+):begin', sql) %}
39+
40+
-- extract the sql for each of the materialized view into a map
41+
{% set views = {} %}
42+
{% if view_names %}
43+
{% for view_name in view_names %}
44+
{% set view_sql = modules.re.findall('--' + view_name + ':begin(.*)--' + view_name + ':end', sql, flags=modules.re.DOTALL)[0] %}
45+
{%- set _ = views.update({view_name: view_sql}) -%}
46+
{% endfor %}
47+
{% else %}
48+
{%- set _ = views.update({"mv": sql}) -%}
49+
{% endif %}
50+
3851
{% if backup_relation is none %}
3952
{{ log('Creating new materialized view ' + target_relation.name )}}
40-
{% call statement('main') -%}
41-
{{ clickhouse__get_create_materialized_view_as_sql(target_relation, sql) }}
42-
{%- endcall %}
53+
{{ clickhouse__get_create_materialized_view_as_sql(target_relation, sql, views) }}
4354
{% elif existing_relation.can_exchange %}
4455
{{ log('Replacing existing materialized view ' + target_relation.name) }}
45-
{% call statement('drop existing materialized view') %}
46-
drop view if exists {{ mv_relation }} {{ cluster_clause }}
47-
{% endcall %}
56+
-- in this section, we look for mvs that has the same pattern as this model, but for some reason,
57+
-- are not listed in the model. This might happen when using multiple mv, and renaming one of the mv in the model.
58+
-- In case such mv found, we raise a warning to the user, that they might need to drop the mv manually.
59+
{{ log('Searching for existing materialized views with the pattern of ' + target_relation.name) }}
60+
{{ log('Views dictionary contents: ' + views | string) }}
61+
62+
{% set tables_query %}
63+
select table_name
64+
from information_schema.tables
65+
where table_schema = '{{ existing_relation.schema }}'
66+
and table_name like '%{{ target_relation.name }}%'
67+
and table_type = 'VIEW'
68+
{% endset %}
69+
70+
{% set tables_result = run_query(tables_query) %}
71+
{% if tables_result is not none %}
72+
{% set tables = tables_result.columns[0].values() %}
73+
{{ log('Current mvs found in ClickHouse are: ' + tables | join(', ')) }}
74+
{% set mv_names = [] %}
75+
{% for key in views.keys() %}
76+
{% do mv_names.append(target_relation.name ~ "_" ~ key) %}
77+
{% endfor %}
78+
{{ log('Model mvs to replace ' + mv_names | string) }}
79+
{% for table in tables %}
80+
{% if table not in mv_names %}
81+
{{ log('Warning - Table "' + table + '" was detected with the same pattern as model name "' + target_relation.name + '" but was not found in this run. In case it is a renamed mv that was previously part of this model, drop it manually (!!!)') }}
82+
{% endif %}
83+
{% endfor %}
84+
{% else %}
85+
{{ log('No existing mvs found matching the pattern. continuing..', info=True) }}
86+
{% endif %}
87+
{{ clickhouse__drop_mvs(target_relation, cluster_clause, views) }}
4888
{% if should_full_refresh() %}
4989
{% call statement('main') -%}
5090
{{ get_create_table_as_sql(False, backup_relation, sql) }}
@@ -56,12 +96,10 @@
5696
select 1
5797
{%- endcall %}
5898
{% endif %}
59-
{% call statement('create new materialized view') %}
60-
{{ clickhouse__create_mv_sql(mv_relation, existing_relation, cluster_clause, sql) }}
61-
{% endcall %}
99+
{{ clickhouse__create_mvs(existing_relation, cluster_clause, views) }}
62100
{% else %}
63101
{{ log('Replacing existing materialized view ' + target_relation.name) }}
64-
{{ clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql) }}
102+
{{ clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql, views) }}
65103
{% endif %}
66104

67105
-- cleanup
@@ -78,7 +116,12 @@
78116

79117
{{ run_hooks(post_hooks, inside_transaction=False) }}
80118

81-
{{ return({'relations': [target_relation, mv_relation]}) }}
119+
{% set relations = [target_relation] %}
120+
{% for view in views %}
121+
{{ relations.append(target_relation.derivative('_' + view, 'materialized_view')) }}
122+
{% endfor %}
123+
124+
{{ return({'relations': relations}) }}
82125

83126
{%- endmaterialization -%}
84127

@@ -89,30 +132,47 @@
89132
2. Create a materialized view using the SQL in the model that inserts
90133
data into the table creating during step 1
91134
#}
92-
{% macro clickhouse__get_create_materialized_view_as_sql(relation, sql) -%}
93-
{% call statement('create_target_table') %}
135+
{% macro clickhouse__get_create_materialized_view_as_sql(relation, sql, views) -%}
136+
{% call statement('main') %}
94137
{{ get_create_table_as_sql(False, relation, sql) }}
95138
{% endcall %}
96139
{%- set cluster_clause = on_cluster_clause(relation) -%}
97140
{%- set mv_relation = relation.derivative('_mv', 'materialized_view') -%}
98-
{{ clickhouse__create_mv_sql(mv_relation, relation, cluster_clause, sql) }}
141+
{{ clickhouse__create_mvs(relation, cluster_clause, views) }}
99142
{%- endmacro %}
100143

144+
{% macro clickhouse__drop_mv(mv_relation, cluster_clause) -%}
145+
drop view if exists {{ mv_relation }} {{ cluster_clause }}
146+
{%- endmacro %}u
101147

102-
{% macro clickhouse__create_mv_sql(mv_relation, target_table, cluster_clause, sql) -%}
148+
{% macro clickhouse__create_mv(mv_relation, target_table, cluster_clause, sql) -%}
103149
create materialized view if not exists {{ mv_relation }} {{ cluster_clause }}
104150
to {{ target_table }}
105151
as {{ sql }}
106152
{%- endmacro %}
107153

154+
{% macro clickhouse__drop_mvs(target_relation, cluster_clause, views) -%}
155+
{% for view in views.keys() %}
156+
{%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%}
157+
{% call statement('drop existing mv: ' + view) -%}
158+
{{ clickhouse__drop_mv(mv_relation, cluster_clause) }};
159+
{% endcall %}
160+
{% endfor %}
161+
{%- endmacro %}
162+
163+
{% macro clickhouse__create_mvs(target_relation, cluster_clause, views) -%}
164+
{% for view, view_sql in views.items() %}
165+
{%- set mv_relation = target_relation.derivative('_' + view, 'materialized_view') -%}
166+
{% call statement('create existing mv: ' + view) -%}
167+
{{ clickhouse__create_mv(mv_relation, target_relation, cluster_clause, view_sql) }};
168+
{% endcall %}
169+
{% endfor %}
170+
{%- endmacro %}
108171

109-
{% macro clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql) %}
172+
{% macro clickhouse__replace_mv(target_relation, existing_relation, intermediate_relation, backup_relation, sql, views) %}
110173
{# drop existing materialized view while we recreate the target table #}
111174
{%- set cluster_clause = on_cluster_clause(target_relation) -%}
112-
{%- set mv_relation = target_relation.derivative('_mv', 'materialized_view') -%}
113-
{% call statement('drop existing mv') -%}
114-
drop view if exists {{ mv_relation }} {{ cluster_clause }}
115-
{%- endcall %}
175+
{{ clickhouse__drop_mvs(target_relation, cluster_clause, views) }}
116176

117177
{# recreate the target table #}
118178
{% call statement('main') -%}
@@ -122,5 +182,6 @@
122182
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
123183

124184
{# now that the target table is recreated, we can finally create our new view #}
125-
{{ clickhouse__create_mv_sql(mv_relation, target_relation, cluster_clause, sql) }}
185+
{{ clickhouse__create_mvs(target_relation, cluster_clause, views) }}
126186
{% endmacro %}
187+

0 commit comments

Comments
 (0)