forked from Velir/dbt-ga4
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
21e8ca3
commit 555c137
Showing
16 changed files
with
294 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Auto detect text files and perform LF normalization | ||
* text=auto |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
|
||
target/ | ||
dbt_packages/ | ||
logs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,29 @@ | ||
# dbt-ga4 | ||
DBT guide to package creation: https://docs.getdbt.com/docs/guides/building-packages | ||
DBT project structure notes: https://discourse.getdbt.com/t/how-we-structure-our-dbt-projects/355 | ||
|
||
To connect to BigQuery using OAuth, see instructions here: https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#local-oauth-gcloud-setup | ||
|
||
# Configuration Instructions | ||
|
||
Create the following variables scoped to the ga4 package in your dbt_project.yml | ||
- project (defaults to "bigquery-public-data") | ||
- dataset (defaults to "ga4_obfuscated_sample_ecommerce") | ||
|
||
# TODO | ||
|
||
- Macro to extract hostname from URL | ||
- Create staging tables for the following events: | ||
- scroll | ||
- first_visit | ||
- view_promotion | ||
- click | ||
- add_to_cart | ||
- purchase | ||
- Full event reference: https://developers.google.com/analytics/devguides/collection/ga4/reference/events | ||
|
||
- Create stg_sessions model | ||
- Create stg_users model | ||
|
||
- Recreate Fivetran ga3 models with ga4 data | ||
|
||
- Convert basic unnesting operations into macros |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
name: 'ga4' | ||
version: '1.0.0' | ||
config-version: 2 | ||
profile: 'ga4' | ||
model-paths: ["models"] | ||
analysis-paths: ["analyses"] | ||
test-paths: ["tests"] | ||
seed-paths: ["seeds"] | ||
macro-paths: ["macros"] | ||
snapshot-paths: ["snapshots"] | ||
|
||
target-path: "target" # directory which will store compiled SQL files | ||
clean-targets: # directories to be removed by `dbt clean` | ||
- "target" | ||
- "dbt_packages" | ||
|
||
vars: | ||
start_date: "20201230" # Defines the earliest GA4 _TABLE_SUFFIX to load into base events model. 20201230 produces about 1GB of data scanned. | ||
project: "bigquery-public-data" | ||
dataset: "ga4_obfuscated_sample_ecommerce" | ||
|
||
# Configuring models | ||
# Full documentation: https://docs.getdbt.com/docs/configuring-models | ||
|
||
# In this example config, we tell dbt to build all models in the example/ directory | ||
# as tables. These settings can be overridden in the individual model files | ||
# using the `{{ config(...) }}` macro. | ||
models: | ||
ga4: | ||
+materialized: view |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
--BigQuery does not cache wildcard queries that scan across sharded tables which means it's best to materialize the raw event data as a partitioned table so that future queries benefit from caching | ||
{{ | ||
config( | ||
materialized = 'incremental', | ||
incremental_strategy = 'insert_overwrite', | ||
partition_by={ | ||
"field": "event_date_dt", | ||
"data_type": "date", | ||
} | ||
) | ||
}} | ||
|
||
with source as ( | ||
select * | ||
from {{ source('ga4', 'events') }} | ||
where cast(_table_suffix as int64) >= {{var('start_date')}} | ||
{% if is_incremental() %} | ||
-- Incrementally add new events. Filters on _TABLE_SUFFIX using the max event_date_dt value found | ||
-- See https://docs.getdbt.com/reference/resource-configs/bigquery-configs#the-insert_overwrite-strategy | ||
and parse_date('%Y%m%d',_TABLE_SUFFIX) >= _dbt_max_partition | ||
{% endif %} | ||
), | ||
renamed as ( | ||
select | ||
parse_date('%Y%m%d',event_date) as event_date_dt, | ||
* | ||
EXCEPT (event_date) -- remove event date to ensure usage of event_date_dt which is partitioned | ||
from source | ||
) | ||
|
||
select * from renamed |
37 changes: 37 additions & 0 deletions
37
models/staging/ga4/base/base_ga4__events_static_partition.example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
-- If jobs are running daily, it may make sense to avoid the scanning necessary to determine the max partition date. Instead, a static incremental range can be set and this data will be overwritten/inserted at every incremental run. | ||
|
||
{% set partitions_to_replace = [ | ||
'current_date()', | ||
'date_sub(current_date(), interval 1 day)' | ||
] %} | ||
|
||
--BigQuery does not cache wildcard queries that scan across sharded tables which means it's best to materialize the raw event data as a partitioned table so that future queries benefit from caching | ||
{{ | ||
config( | ||
materialized = 'incremental', | ||
incremental_strategy = 'insert_overwrite', | ||
partition_by={ | ||
"field": "event_date_dt", | ||
"data_type": "date", | ||
} | ||
) | ||
}} | ||
|
||
with source as ( | ||
select * | ||
from {{ source('ga4', 'events') }} | ||
where cast(_table_suffix as int64) >= {{var('start_date')}} | ||
{% if is_incremental() %} | ||
-- recalculate yesterday + today | ||
and parse_date('%Y%m%d',_TABLE_SUFFIX) in ({{ partitions_to_replace | join(',') }}) | ||
{% endif %} | ||
), | ||
renamed as ( | ||
select | ||
parse_date('%Y%m%d',event_date) as event_date_dt, | ||
* | ||
EXCEPT (event_date) -- remove event date to ensure usage of event_date_dt which is partitioned | ||
from source | ||
) | ||
|
||
select * from renamed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
with page_view_with_params as ( | ||
select | ||
event_date_dt, | ||
user_id, | ||
user_pseudo_id, | ||
event_timestamp, | ||
event_name, params, | ||
traffic_source | ||
FROM {{ref('base_ga4__events')}}, | ||
UNNEST(event_params) as params | ||
where event_name = 'page_view' | ||
and (params.key = 'page_location' or | ||
params.key = 'ga_session_id' or | ||
params.key = 'ga_session_number' or | ||
params.key = 'entrances' or | ||
params.key = 'page_title' or | ||
params.key = 'page_referrer') | ||
), | ||
pivoted as ( | ||
select | ||
event_date_dt, | ||
user_pseudo_id, | ||
user_id, | ||
event_timestamp, | ||
event_name, | ||
traffic_source.name as traffic_source_name, | ||
traffic_source.source as traffic_source_source, | ||
traffic_source.medium as traffic_source_medium, | ||
MAX(if(params.key = "page_location", params.value.string_value, NULL)) as page_location, | ||
MAX(if(params.key = "ga_session_id", params.value.int_value, NULL)) as ga_session_id, | ||
MAX(if(params.key = "ga_session_number", params.value.int_value, NULL)) as ga_session_number, | ||
MAX(if(params.key = "entrances", params.value.int_value, 0)) as entrances, | ||
MAX(if(params.key = "page_title", params.value.string_value, NULL)) as page_title, | ||
MAX(if(params.key = "page_referrer", params.value.string_value, NULL)) as page_referrer | ||
|
||
from page_view_with_params | ||
group by 1,2,3,4,5,6,7,8 | ||
) | ||
|
||
select | ||
*, | ||
case | ||
when ga_session_number = 1 then TRUE | ||
else FALSE | ||
end as is_new_user | ||
from pivoted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
with purchase_with_params as ( | ||
select | ||
event_date_dt, | ||
user_id, | ||
user_pseudo_id, | ||
event_timestamp, | ||
event_name, params, | ||
traffic_source | ||
FROM {{ref('base_ga4__events')}}, | ||
UNNEST(event_params) as params | ||
where event_name = 'purchase' -- Pull only 'purchase' events | ||
and | ||
( | ||
params.key = 'page_location' or | ||
params.key = 'ga_session_id' or | ||
params.key = 'ga_session_number' or | ||
params.key = 'page_referrer' or | ||
params.key = 'currency' or | ||
params.key = 'value' or | ||
params.key = 'payment_type' or | ||
params.key = 'coupon' or | ||
params.key = 'transaction_id' | ||
) | ||
), | ||
pivoted as ( | ||
select | ||
event_date_dt, | ||
user_pseudo_id, | ||
user_id, | ||
event_timestamp, | ||
event_name, | ||
traffic_source.name as traffic_source_name, | ||
traffic_source.source as traffic_source_source, | ||
traffic_source.medium as traffic_source_medium, | ||
MAX(if(params.key = "page_location", params.value.string_value, NULL)) as page_location, | ||
MAX(if(params.key = "ga_session_id", params.value.int_value, NULL)) as ga_session_id, | ||
MAX(if(params.key = "ga_session_number", params.value.int_value, NULL)) as ga_session_number, | ||
MAX(if(params.key = "page_referrer", params.value.string_value, NULL)) as page_referrer, | ||
MAX(if(params.key = "coupon", params.value.string_value, NULL)) as coupon, | ||
MAX(if(params.key = "transaction_id", params.value.string_value, NULL)) as transaction_id, | ||
MAX(if(params.key = "currency", params.value.string_value, NULL)) as currency, | ||
MAX(if(params.key = "payment_type", params.value.string_value, NULL)) as payment_type, | ||
MAX(if(params.key = "value", params.value.float_value, NULL)) as value | ||
-- TODO how to handle items array? | ||
|
||
from purchase_with_params | ||
group by 1,2,3,4,5,6,7,8 | ||
) | ||
|
||
select | ||
*, | ||
case | ||
when ga_session_number = 1 then TRUE | ||
else FALSE | ||
end as is_new_user | ||
from pivoted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
with session_start_with_params as ( | ||
select | ||
event_date_dt, | ||
user_pseudo_id, | ||
user_id, | ||
event_timestamp, | ||
event_name, | ||
params, | ||
traffic_source | ||
FROM {{ref('base_ga4__events')}}, | ||
UNNEST(event_params) as params | ||
where event_name = 'session_start' | ||
and (params.key = 'page_location' or params.key = 'ga_session_id' or params.key = 'ga_session_number') | ||
), | ||
pivoted as ( | ||
select | ||
event_date_dt, | ||
user_pseudo_id, | ||
user_id, | ||
event_timestamp, | ||
event_name, | ||
traffic_source.name as traffic_source_name, | ||
traffic_source.source as traffic_source_source, | ||
traffic_source.medium as traffic_source_medium, | ||
MAX(if(params.key = "page_location", params.value.string_value, NULL)) as page_location, | ||
MAX(if(params.key = "ga_session_id", params.value.int_value, NULL)) as ga_session_id, | ||
MAX(if(params.key = "ga_session_number", params.value.int_value, NULL)) as ga_session_number, | ||
from session_start_with_params | ||
group by 1,2,3,4,5,6,7,8 | ||
) | ||
|
||
select | ||
*, | ||
case | ||
when ga_session_number = 1 then TRUE | ||
else FALSE | ||
end as is_new_user | ||
from pivoted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
version: 2 | ||
|
||
sources: | ||
- name: ga4 | ||
database: "{{var('project','bigquery-public-data')}}" #Default to bigquery-public-data | ||
schema: "{{var('dataset', 'ga4_obfuscated_sample_ecommerce')}}" #Default to ga4_obfuscated_sample_ecommerce | ||
tables: | ||
- name: events | ||
identifier: events_* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version: 2 | ||
|
||
models: | ||
- name: base_ga4__events | ||
description: Base events model that pulls all fields from raw data. Resulting table is partitioned on event_date and is useful in that BQ queries can be cached against this table, but not against wildcard searches from the original tables which are sharded on date. | ||
- name: stg_ga4__page_view | ||
description: GA4 events filtered to only show 'page_view' events. Pivots common event parameters into separate columns. | ||
- name: stg_ga4__session_start | ||
description: GA4 events filtered to only show 'session_start' events. Pivots common event parameters into separate columns. | ||
- name: stg_ga4__purchase | ||
description: GA4 events filtered to only show 'purchase' events. Pivots common event parameters into separate columns. |
Empty file.
Empty file.
Empty file.