Adds the current models model with recent run stats (#79)

alanmcruickshank · NiallRees · web-flow · commit bad83e80b3d1 · 2022-02-02T13:28:57.000Z
* Enforce consistency across materialised models.

* Adds the current models model with recent run stats

* linting

* linting

* Update models/schemas.yml

Co-authored-by: Niall Woodward &lt;niall@niallrees.com&gt;

* Add new model to Readme.md

Co-authored-by: Niall Woodward &lt;niall@niallrees.com&gt;
diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ Models included:
 - `dim_dbt__seeds`
 - `dim_dbt__snapshots`
 - `dim_dbt__tests`
+- `dim_dbt__current_models`
 - `fct_dbt__critical_path`
 - `fct_dbt__latest_full_model_executions`
 - `fct_dbt__model_executions`
diff --git a/models/dim_dbt__current_models.sql b/models/dim_dbt__current_models.sql
@@ -0,0 +1,100 @@
+with run_results as (
+
+    select *
+    from {{ ref('fct_dbt__run_results') }}
+
+),
+
+models as (
+
+    select *
+    from {{ ref('dim_dbt__models') }}
+
+),
+
+model_executions as (
+
+    select *
+    from {{ ref('fct_dbt__model_executions') }}
+
+),
+
+-- Get the most recent comile run
+latest_compile as (
+
+    select
+        command_invocation_id,
+        dbt_cloud_run_id
+    from run_results
+    where execution_command = 'run'
+    order by artifact_generated_at desc
+    limit 1
+
+),
+
+-- Models present in the most recent compile run
+latest_models as (
+
+    select models.*
+    from models
+    -- In a local deploy, the command id is sufficient, but not in cloud - that requires the cloud run id to achieve a match.
+    inner join latest_compile
+        on models.command_invocation_id = latest_compile.command_invocation_id
+            or models.dbt_cloud_run_id = latest_compile.dbt_cloud_run_id
+
+),
+
+latest_model_runs as (
+
+    select
+        latest_models.node_id,
+        model_executions.query_completed_at,
+        model_executions.total_node_runtime,
+        model_executions.rows_affected,
+        model_executions.was_full_refresh,
+        -- Work out indices so we can get the most recent runs, both incremental and full.
+        row_number() over (
+            partition by latest_models.node_id, model_executions.was_full_refresh
+            order by model_executions.query_completed_at desc
+        ) as run_idx
+    from latest_models
+    inner join model_executions
+        on latest_models.node_id = model_executions.node_id
+    -- Only successful runs
+    where model_executions.status = 'success'
+
+),
+
+latest_model_stats as (
+    select
+        node_id,
+        max(iff(not was_full_refresh, query_completed_at, null)) as last_incremental_run_completed_at,
+        max(iff(not was_full_refresh, total_node_runtime, null)) as last_incremental_run_total_runtime,
+        max(iff(not was_full_refresh, rows_affected, null)) as last_incremental_run_rows_affected,
+        max(iff(was_full_refresh, query_completed_at, null)) as last_full_run_completed_at,
+        max(iff(was_full_refresh, total_node_runtime, null)) as last_full_run_total_runtime,
+        max(iff(was_full_refresh, rows_affected, null)) as last_full_run_rows_affected
+    from latest_model_runs
+    -- Only most recent runs (of each type)
+    where run_idx = 1
+    group by node_id
+
+),
+
+final as (
+
+    select
+        latest_models.*,
+        latest_model_stats.last_incremental_run_completed_at,
+        latest_model_stats.last_incremental_run_total_runtime,
+        latest_model_stats.last_incremental_run_rows_affected,
+        latest_model_stats.last_full_run_completed_at,
+        latest_model_stats.last_full_run_total_runtime,
+        latest_model_stats.last_full_run_rows_affected
+    from latest_models
+    left join latest_model_stats
+        on latest_models.node_id = latest_model_stats.node_id
+
+)
+
+select * from final
diff --git a/models/schemas.yml b/models/schemas.yml
@@ -215,3 +215,42 @@ models:
         description: Name of the database entity this source resolved to.
       - name: source_path
         description: Filepath of the source.
+
+  - name: dim_dbt__current_models
+    description: A subset of the models found in `dim_models`, which were present in the manifest of the most recent run. This
+      represents the models which are currently live in the dbt project.
+    columns:
+      - name: manifest_model_id
+        description: Primary key generated from the command_invocation_id and checksum.
+        tests:
+          - unique
+          - not_null
+      - name: command_invocation_id
+        description: The id of the command which resulted in the source artifact's generation.
+      - name: artifact_generated_at
+        description: Timestamp of when the source artifact was generated.
+      - name: node_id
+        description: Unique id for the node, in the form of model.[package_name].[model_name]
+      - name: name
+        description: The model name.
+      - name: model_schema
+      - name: depends_on_nodes
+        description: List of node ids the model depends on.
+      - name: package_name
+      - name: model_path
+        description: Filepath of the model.
+      - name: checksum
+        description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same.
+      - name: model_materialization
+      - name: last_incremental_run_completed_at
+        description: The completion time from the last time this model was run during an increment run.
+      - name: last_incremental_run_total_runtime
+        description: The total runtime from the last time this model was run during an increment run.
+      - name: last_incremental_run_rows_affected
+        description: The number of rows affected from the last time this model was run during an increment run.
+      - name: last_full_run_completed_at
+        description: The completion time from the last time this model was run during a full refresh.
+      - name: last_full_run_total_runtime
+        description: The total runtime from the last time this model was run during a full refresh.
+      - name: last_full_run_rows_affected
+        description: The number of rows affected from the last time this model was run during a full refresh.