Merge branch 'main' of github.com:infinitelambda/dq-tools into main

infinitelambda · Sep 1, 2023 · 7065e02 · 7065e02
2 parents 64e4d73 + bf3b415
commit 7065e02
Show file tree

Hide file tree

Showing 15 changed files with 181 additions and 100 deletions.
diff --git a/.github/workflows/ci-pr.yml b/.github/workflows/ci-pr.yml
@@ -11,12 +11,16 @@ jobs:
       max-parallel: 1
       matrix:
         adapter: ["snowflake"]
-        version: ["1.3.0", "1.4.1", "1.5.2"]
+        version: ["1.6.1"]
     container:
       image: "ghcr.io/dbt-labs/dbt-${{ matrix.adapter }}:${{ matrix.version }}"
     steps:
       - uses: actions/checkout@v3
 
+      - name: Install additional deps
+        run: |
+          pip install "dbt-metricflow[${{ matrix.adapter }}]"
+
       - name: Run integration test
         run: |
           chmod +x run_test.sh
@@ -38,12 +42,16 @@ jobs:
       max-parallel: 1
       matrix:
         adapter: ["bigquery"]
-        version: ["1.3.0", "1.4.1", "1.5.3"]
+        version: ["1.6.3"]
     container:
       image: "ghcr.io/dbt-labs/dbt-${{ matrix.adapter }}:${{ matrix.version }}"
     steps:
       - uses: actions/checkout@v3
 
+      - name: Install additional deps
+        run: |
+          pip install "dbt-metricflow[${{ matrix.adapter }}]"
+
       - name: Set up credentials
         run: |
           echo $DBT_ENV_SECRET_BIGQUERY_SERVICE_KEY_JSON | base64 -d > /opt/bigquery_service_key.json

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ The purpose of the dq tool is to make simple storing test results and visualisat
   ```yml
   packages:
     - package: infinitelambda/dq_tools
-      version: [">=1.3.0", "<1.4.0"]
+      version: [">=1.2.0", "<1.3.0"]
   ```
 - Configure schema in `dbt_project.yml` file:
 
@@ -124,13 +124,21 @@ Go to [dbt Hub](https://hub.getdbt.com/infinitelambda/dq_tools/latest/) and regi
 
 ### 1. Create table DQ_ISSUE_LOG in the database
 
-A macro `create_table_dq_issue_log` ([source](https://github.com/infinitelambda/dq-tools/blob/main/macros/create_table_dq_issue_log.sql)) will create the log table in your database (Snowflake) / project (BigQuery).
+Since the version 1.3, the table `dq_issue_log` is made as dbt model, no more manual hook config :tada:.
 
-Add `on-run-start` hook (required dbt >= 1.0.0):
-```yml
-on-run-start:
-  - '{{ dq_tools.create_table_dq_issue_log() }}'
-```
+It should be created automatically within your upstream dbt command. If not, all you should do that is running the command: `dbt run -s dq_tools`.
+
+<details>
+  <summary>For dq-tools legacy version >=1.0,<1.3</summary>
+
+  A macro `create_table_dq_issue_log` ([source](https://github.com/infinitelambda/dq-tools/blob/main/macros/create_table_dq_issue_log.sql)) will create the log table in your database (Snowflake) / project (BigQuery).
+
+  Add `on-run-start` hook (required dbt >= 1.0.0):
+  ```yml
+  on-run-start:
+    - '{{ dq_tools.create_table_dq_issue_log() }}'
+  ```
+</details>
 
 <details>
   <summary>For dq-tools legacy version < 1.0, you can run it as an operation</summary>
@@ -147,18 +155,16 @@ Value for variable `dbt_dq_tool_schema: your_schema_name` needs to be added to d
 e.g.
 ```yaml
 vars:
-  # to create db table in the schema named as AUDIT
+  # (optional) to create db table in the schema named as AUDIT, default to `target.schema`
   dbt_dq_tool_schema: AUDIT
-  # (optional) to create db table in the database named as DQ_TOOLS
+  # (optional) to create db table in the database named as DQ_TOOLS, default to `target.database`
   dbt_dq_tool_database: DQ_TOOLS
 ```
 
 ### 3. Decide to save test result to Data Warehouse table:
 
 #### With `dq_tools_enable_store_test_results` variable:
 
-  NOTE: This variable only works when `dbt_test_results_to_db = False` (default, for backward compatibility purpose) with the newest version of dq-tools.
-
   Add the `on-run-end` hook to you project:
   ```yaml
   on-run-end:
@@ -233,17 +239,25 @@ vars:
 </details>
 
 ### 4. Decide to enable building the downstream models of the log table:
-Enable it in `dbt_project.yml` file:
-```yml
-# dbt_project.yml
-models:
-  dq_tools:
-    +enabled: true
 
-metrics:
-  dq_tools:
-    +enabled: true
-```
+Since the version 1.4+, all models and metrics will be enabled by default.
+
+  <details>
+    <summary>For dq-tools version <1.4</summary>
+
+    Enable it in `dbt_project.yml` file:
+
+    ```yml
+    # dbt_project.yml
+    models:
+      dq_tools:
+        +enabled: true
+
+    metrics:
+      dq_tools:
+        +enabled: true
+    ```
+  </details>
 
 ## Macros
 

diff --git a/dbt_project.yml b/dbt_project.yml
@@ -1,7 +1,7 @@
 name: 'dq_tools'
 config-version: 2
 version: '1.2.0'
-require-dbt-version: ">=1.3.0"
+require-dbt-version: ">=1.6.0"
 
 test-paths: ["tests"]
 target-path: "target"
@@ -10,21 +10,16 @@ macro-paths: ["macros"]
 log-path: "logs"
 model-paths: ["models"]
 
-models:
-  dq_tools:
-    01_lake:
-      +enabled: true
-    02_staging:
-      +enabled: false
-    03_mart:
-      +enabled: false
-
 vars:
   # dq_tools_enable_store_test_results: true
   # dbt_dq_tool_schema: dq_raw
   # dbt_dq_tool_database: dq_tools
   # dbt_dq_tool_full_refresh: false
+  # >> for test coverage
   # dbt_dq_tool_test_coverage_exclusion:
   #   by_database_fqn: []
   #   by_schema_fqn: []
-  #   by_table_fqn: []
+  #   by_table_fqn: []
+  # >> for metricflow
+  # dbt_dq_tool_start_date
+  # dbt_dq_tool_end_date
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -51,9 +51,4 @@ seeds:
 models:
   dq_tools:
     03_mart:
-      +enabled: true
-      +schema: dq_tools_mart
-
-metrics:
-  dq_tools:
-    +enabled: true
+      +schema: dq_tools_mart
diff --git a/integration_tests/packages_1.6.yml b/integration_tests/packages_1.6.yml
@@ -0,0 +1,4 @@
+packages:
+  - local: ../
+  - package: dbt-labs/dbt_utils
+    version: [">=1.0.0", "<2.0.0"]
diff --git a/models/04_metric/data_quality_score.yml b/models/04_metric/data_quality_score.yml
diff --git a/models/04_metric/metric__data_quality_score.yml b/models/04_metric/metric__data_quality_score.yml
@@ -0,0 +1,7 @@
+metrics:
+  - name: data_quality_score
+    description: Data Quality Score based on the Testing Result which is calculaed by looking at Row Passed/Row Processed.
+    type: simple
+    type_params:
+      measure: data_quality_score
+    label: Data Quality Score
diff --git a/models/04_metric/metric__test_coverage.yml b/models/04_metric/metric__test_coverage.yml
@@ -0,0 +1,7 @@
+metrics:
+  - name: test_coverage
+    description: Percentage of test coverage by each dbt invocation
+    type: simple
+    type_params:
+      measure: coverage_pct
+    label: Test Coverage
diff --git a/models/04_metric/metric__test_to_column_ratio.yml b/models/04_metric/metric__test_to_column_ratio.yml
@@ -0,0 +1,7 @@
+metrics:
+  - name: test_to_column_ratio
+    description: Ratio between the number of tests vs the number of columns
+    type: simple
+    type_params:
+      measure: test_to_column_ratio
+    label: Test to Column Ratio
diff --git a/models/04_metric/metricflow_time_spine.sql b/models/04_metric/metricflow_time_spine.sql
@@ -0,0 +1,30 @@
+{{
+  config(
+    materialized = 'table',
+    tags = ['semantic', 'metricflow']
+  )
+}}
+
+--Check https://docs.getdbt.com/docs/build/metricflow-time-spine
+{% set start_date = var("dbt_dq_tool_start_date", "to_date('01/01/2000','mm/dd/yyyy')") -%}
+{% set end_date = var("dbt_dq_tool_end_date", "to_date('01/01/2030','mm/dd/yyyy')") -%}
+
+{%- if target.type == "bigquery" %}
+
+  {% set start_date = var("dbt_dq_tool_start_date", "DATE(2000,01,01)") %}
+  {% set end_date = var("dbt_dq_tool_end_date", "DATE(2030,01,01)") %}
+
+{%- endif %}
+
+with days as (
+
+  {{ dbt_utils.date_spine('day', start_date, end_date) }}
+
+),
+
+final as (
+    select cast(date_day as date) as date_day
+    from days
+)
+
+select * from final
diff --git a/models/04_metric/sm__data_quality_score.yml b/models/04_metric/sm__data_quality_score.yml
@@ -0,0 +1,33 @@
+semantic_models:
+  - name: data_quality_score
+    model: ref('bi_dq_metrics')
+    defaults:
+      agg_time_dimension: run_time
+    entities:
+      - name: key
+        type: primary
+        expr: > 
+          concat(
+            run_time,
+            rule_name,
+            data_concept,
+            data_element,
+            indicator_category
+          )
+    measures:
+      - name: data_quality_score
+        agg: average
+        expr: (rows_processed - rows_failed) * 1.00 / (rows_processed)
+    dimensions:
+      - name: run_time
+        type: time
+        type_params:
+          time_granularity: day
+      - name: rule_name
+        type: categorical
+      - name: data_concept
+        type: categorical
+      - name: data_element
+        type: categorical
+      - name: dq_dimension
+        type: categorical
diff --git a/models/04_metric/sm__test_coverage.yml b/models/04_metric/sm__test_coverage.yml
@@ -0,0 +1,25 @@
+semantic_models:
+  - name: test_coverage
+    model: ref('test_coverage')
+    defaults:
+      agg_time_dimension: check_timestamp
+    entities:
+      - name: key
+        type: primary
+        expr: > 
+          concat(
+            check_timestamp,
+            invocation_id
+          )
+    measures:
+      - name: coverage_pct
+        agg: average
+      - name: test_to_column_ratio
+        agg: average
+    dimensions:
+      - name: check_timestamp
+        type: time
+        type_params:
+          time_granularity: day
+      - name: invocation_id
+        type: categorical
diff --git a/models/04_metric/test_coverage.yml b/models/04_metric/test_coverage.yml
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,13 +6,15 @@ authors = ["IL <dat@infinitelambda.com>"]
 
 [tool.poetry.dependencies]
 python = "^3.9"
-dbt-core = "~1.4.0"
-dbt-snowflake = "~1.4.0"
-# dbt-bigquery = "~1.4.0"
-pre-commit = "^2.17.0"
+dbt-core = "~1.6.0"
+dbt-snowflake = "~1.6.0"
+dbt-metricflow = {extras = ["snowflake"], version = "~1.6.0"}
+# dbt-bigquery = "~1.6.0"
+# dbt-metricflow = {extras = ["bigquery"], version = "~1.6.0"}
 
 [tool.poetry.dev-dependencies]
 poethepoet = "^0.12.3"
+pre-commit = "^2.17.0"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]