From 988642bb7c458123d8ea4565c929d4f09efec2a3 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 13 Jan 2026 17:38:03 +0000 Subject: [PATCH 01/10] First pass --- dbt/models/default/default.vw_pin_sale.sql | 43 +++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 14430d981..5f6e7a567 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -367,7 +367,48 @@ SELECT -- If there is no override, default to sv_is_outlier WHEN sales_val.sv_is_outlier IS NOT NULL THEN sales_val.sv_is_outlier - END AS is_outlier + END AS is_outlier, + -- Combined outlier reasons: model SV reasons + override “trigger” fieldnames +ARRAY_DISTINCT( + CONCAT( + -- Model reasons (sv_outlier_reason1-3) + FILTER( + ARRAY[ + sales_val.sv_outlier_reason1, + sales_val.sv_outlier_reason2, + sales_val.sv_outlier_reason3 + ], + r -> r IS NOT NULL AND TRIM(r) <> '' + ), + + -- Override triggers (only those that *would* make the sale an outlier) + FILTER( + ARRAY[ + CASE + WHEN COALESCE(flag_override.is_arms_length = FALSE, FALSE) + THEN 'is_arms_length' + END, + CASE + WHEN COALESCE(flag_override.is_flip = TRUE, FALSE) + THEN 'is_flip' + END, + CASE + WHEN COALESCE(flag_override.has_class_change = TRUE, FALSE) + THEN 'has_class_change' + END, + CASE + WHEN COALESCE(flag_override.has_characteristic_change = 'yes_major', FALSE) + THEN 'has_characteristic_change' + END, + CASE + WHEN COALESCE(flag_override.requires_field_check = TRUE, FALSE) + THEN 'requires_field_check' + END + ], + r -> r IS NOT NULL + ) + ) +) AS outlier_reason FROM unique_sales LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no From 13f850e2d0c2caa779d2e9dac816e8ff0d324360 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Tue, 13 Jan 2026 22:17:11 +0000 Subject: [PATCH 02/10] Edit comments --- dbt/models/default/default.vw_pin_sale.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 5f6e7a567..313ee6383 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -368,10 +368,10 @@ SELECT WHEN sales_val.sv_is_outlier IS NOT NULL THEN sales_val.sv_is_outlier END AS is_outlier, - -- Combined outlier reasons: model SV reasons + override “trigger” fieldnames + -- Combined outlier reasons: model SV reasons + manual override reasons ARRAY_DISTINCT( CONCAT( - -- Model reasons (sv_outlier_reason1-3) + -- Sales val statistical model reasons (sv_outlier_reason1-3) FILTER( ARRAY[ sales_val.sv_outlier_reason1, @@ -381,7 +381,7 @@ ARRAY_DISTINCT( r -> r IS NOT NULL AND TRIM(r) <> '' ), - -- Override triggers (only those that *would* make the sale an outlier) + -- Manual override triggers FILTER( ARRAY[ CASE From d66b74e1858be1d4c1d467711b9ca493e6e93bf7 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 14 Jan 2026 20:41:57 +0000 Subject: [PATCH 03/10] Add prefix and remove snake case --- dbt/models/default/default.vw_pin_sale.sql | 79 +++++++++++----------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 313ee6383..788799467 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -369,46 +369,49 @@ SELECT THEN sales_val.sv_is_outlier END AS is_outlier, -- Combined outlier reasons: model SV reasons + manual override reasons -ARRAY_DISTINCT( - CONCAT( - -- Sales val statistical model reasons (sv_outlier_reason1-3) - FILTER( - ARRAY[ - sales_val.sv_outlier_reason1, - sales_val.sv_outlier_reason2, - sales_val.sv_outlier_reason3 - ], - r -> r IS NOT NULL AND TRIM(r) <> '' - ), + ARRAY_DISTINCT( + CONCAT( + -- Sales val statistical model reasons (sv_outlier_reason1-3) + FILTER( + ARRAY[ + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason1), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason2), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason3) + ], + r -> r IS NOT NULL AND TRIM(r) <> 'SV pipeline:' + ), - -- Manual override triggers - FILTER( - ARRAY[ - CASE - WHEN COALESCE(flag_override.is_arms_length = FALSE, FALSE) - THEN 'is_arms_length' - END, - CASE - WHEN COALESCE(flag_override.is_flip = TRUE, FALSE) - THEN 'is_flip' - END, - CASE - WHEN COALESCE(flag_override.has_class_change = TRUE, FALSE) - THEN 'has_class_change' - END, - CASE - WHEN COALESCE(flag_override.has_characteristic_change = 'yes_major', FALSE) - THEN 'has_characteristic_change' - END, - CASE - WHEN COALESCE(flag_override.requires_field_check = TRUE, FALSE) - THEN 'requires_field_check' - END - ], - r -> r IS NOT NULL + -- Manual override triggers (human-readable) + FILTER( + ARRAY[ + IF( + COALESCE(flag_override.is_arms_length = FALSE, FALSE), + 'Analyst: Arms length' + ), + IF( + COALESCE(flag_override.is_flip = TRUE, FALSE), + 'Analyst: Flip' + ), + IF( + COALESCE(flag_override.has_class_change = TRUE, FALSE), + 'Analyst: Class change' + ), + IF( + COALESCE( + flag_override.has_characteristic_change = 'yes_major', + FALSE + ), + 'Analyst: Characteristic change' + ), + IF( + COALESCE(flag_override.requires_field_check = TRUE, FALSE), + 'Analyst: Requires field check' + ) + ], + r -> r IS NOT NULL + ) ) - ) -) AS outlier_reason + ) AS outlier_reason FROM unique_sales LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no From 2b1362b764963c6272a3fd57d8c5d0da9dcb9013 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 14 Jan 2026 20:54:16 +0000 Subject: [PATCH 04/10] Make arms length negative --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 788799467..0a0490cd9 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -386,7 +386,7 @@ SELECT ARRAY[ IF( COALESCE(flag_override.is_arms_length = FALSE, FALSE), - 'Analyst: Arms length' + 'Analyst: Non-arms length' ), IF( COALESCE(flag_override.is_flip = TRUE, FALSE), From aae4e6df732c6111eec6247a43199a812d504fc2 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 14 Jan 2026 21:16:20 +0000 Subject: [PATCH 05/10] Place analyst reasons before sv reasons --- dbt/models/default/default.vw_pin_sale.sql | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 0a0490cd9..7183c5d8b 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -368,20 +368,10 @@ SELECT WHEN sales_val.sv_is_outlier IS NOT NULL THEN sales_val.sv_is_outlier END AS is_outlier, - -- Combined outlier reasons: model SV reasons + manual override reasons + -- Combined outlier reasons: manual override reasons + model SV reasons ARRAY_DISTINCT( CONCAT( - -- Sales val statistical model reasons (sv_outlier_reason1-3) - FILTER( - ARRAY[ - CONCAT('SV pipeline: ', sales_val.sv_outlier_reason1), - CONCAT('SV pipeline: ', sales_val.sv_outlier_reason2), - CONCAT('SV pipeline: ', sales_val.sv_outlier_reason3) - ], - r -> r IS NOT NULL AND TRIM(r) <> 'SV pipeline:' - ), - - -- Manual override triggers (human-readable) + -- Manual override triggers (human-readable) FIRST FILTER( ARRAY[ IF( @@ -409,6 +399,16 @@ SELECT ) ], r -> r IS NOT NULL + ), + + -- Sales val statistical model reasons (sv_outlier_reason1-3) SECOND + FILTER( + ARRAY[ + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason1), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason2), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason3) + ], + r -> r IS NOT NULL AND TRIM(r) <> 'SV pipeline:' ) ) ) AS outlier_reason From db05fa877e70eacf99ac02c0c09871e8663042e1 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Wed, 14 Jan 2026 21:17:51 +0000 Subject: [PATCH 06/10] Comments and linting --- dbt/models/default/default.vw_pin_sale.sql | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 7183c5d8b..2dd169165 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -371,7 +371,7 @@ SELECT -- Combined outlier reasons: manual override reasons + model SV reasons ARRAY_DISTINCT( CONCAT( - -- Manual override triggers (human-readable) FIRST + -- Manual analyst override triggers FILTER( ARRAY[ IF( @@ -388,27 +388,30 @@ SELECT ), IF( COALESCE( - flag_override.has_characteristic_change = 'yes_major', + flag_override.has_characteristic_change + = 'yes_major', FALSE ), 'Analyst: Characteristic change' ), IF( - COALESCE(flag_override.requires_field_check = TRUE, FALSE), + COALESCE( + flag_override.requires_field_check = TRUE, FALSE + ), 'Analyst: Requires field check' ) ], r -> r IS NOT NULL ), - -- Sales val statistical model reasons (sv_outlier_reason1-3) SECOND + -- Sales val statistical model reasons (sv_outlier_reason1-3) FILTER( ARRAY[ CONCAT('SV pipeline: ', sales_val.sv_outlier_reason1), CONCAT('SV pipeline: ', sales_val.sv_outlier_reason2), CONCAT('SV pipeline: ', sales_val.sv_outlier_reason3) ], - r -> r IS NOT NULL AND TRIM(r) <> 'SV pipeline:' + r -> r IS NOT NULL AND TRIM(r) != 'SV pipeline:' ) ) ) AS outlier_reason From 9d16c8067e46248adcb1c07c51476011a3be5e07 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 15 Jan 2026 20:49:22 +0000 Subject: [PATCH 07/10] Add source_is_outlier --- dbt/models/default/default.vw_pin_sale.sql | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 2dd169165..01b457dc1 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -414,7 +414,22 @@ SELECT r -> r IS NOT NULL AND TRIM(r) != 'SV pipeline:' ) ) - ) AS outlier_reason + ) AS outlier_reason, + -- Logic that mirrots the is_outlier_field but lets us know explicity + -- if the is_outlier column determination is an analyst override or + -- an algorithmic fallback. + CASE + WHEN + flag_override.is_arms_length IS NOT NULL + OR flag_override.is_flip IS NOT NULL + OR flag_override.has_class_change IS NOT NULL + OR flag_override.has_characteristic_change IS NOT NULL + OR flag_override.requires_field_check IS NOT NULL + THEN 'analyst' + + WHEN sales_val.sv_is_outlier IS NOT NULL + THEN 'algorithm' + END AS source_is_outlier FROM unique_sales LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no From 408d6068557efa3cf32ca8a882c505ff7caf1f90 Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 15 Jan 2026 21:13:13 +0000 Subject: [PATCH 08/10] Add docs --- dbt/models/default/schema/default.vw_pin_sale.yml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/schema/default.vw_pin_sale.yml b/dbt/models/default/schema/default.vw_pin_sale.yml index 9ce4e3ffd..1c7fa6208 100644 --- a/dbt/models/default/schema/default.vw_pin_sale.yml +++ b/dbt/models/default/schema/default.vw_pin_sale.yml @@ -25,11 +25,11 @@ models: description: Indicator for whether or not the observation uses the MyDec sale date - name: is_outlier description: | - The final determination indicating whether a sale is a statistical + The final determination indicating whether a sale is an outlier that should be excluded from model training. - Combines information from our sales validation model with information - from analysts who review our outliers for correctness. + Combines information from our statistical sales validation model with + information from analysts who review our outliers for correctness. - name: mydec_deed_type description: Deed type from MyDec, more granular than CCAO deed type - name: mydec_line_8_current_use @@ -72,6 +72,10 @@ models: description: '{{ doc("shared_column_nbhd_code") }}' - name: num_parcels_sale description: '{{ doc("shared_column_num_parcels_sale") }}' + - name: outlier_reason + description: This is an array colum that contains all of the distinct outlier reasons between analyst review and our + algorithmic sales validation model. A nuance is that there are some outlier reasons that don't necessarily + cause the `is_outlier` status to be true. - name: pin description: '{{ doc("shared_column_pin") }}' - name: requires_field_check @@ -92,6 +96,9 @@ models: description: '{{ doc("shared_column_sale_price") }}' - name: seller_name description: '{{ doc("shared_column_seller_name") }}' + - name: source_is_outlier + description: This field tells us if the determination in `is_outlier` was sourced from analyst review or from our algorithmic + sales validation pipeline. - name: sv_is_heuristic_outlier description: '{{ doc("shared_column_sv_is_heuristic_outlier") }}' - name: sv_is_outlier From eaf6287c50062ae7159610731b66ae07c5d7f87d Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Thu, 15 Jan 2026 21:29:20 +0000 Subject: [PATCH 09/10] Adjust comment --- dbt/models/default/default.vw_pin_sale.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 01b457dc1..ce5884392 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -415,7 +415,7 @@ SELECT ) ) ) AS outlier_reason, - -- Logic that mirrots the is_outlier_field but lets us know explicity + -- Logic similar to the is_outlier_field but lets us know explicity -- if the is_outlier column determination is an analyst override or -- an algorithmic fallback. CASE From 530e5c04943fda270639212a833ca049374c583b Mon Sep 17 00:00:00 2001 From: Michael Wagner Date: Fri, 16 Jan 2026 15:48:19 +0000 Subject: [PATCH 10/10] Tweak comment --- dbt/models/default/default.vw_pin_sale.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index ce5884392..e84d10dd7 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -415,9 +415,9 @@ SELECT ) ) ) AS outlier_reason, - -- Logic similar to the is_outlier_field but lets us know explicity - -- if the is_outlier column determination is an analyst override or - -- an algorithmic fallback. + -- Logic similar to the is_outlier field but lets us know explicity + -- if the is_outlier column determination is sourced from an analyst + -- override or an algorithmic fallback. CASE WHEN flag_override.is_arms_length IS NOT NULL