diff --git a/dbt/models/default/default.vw_pin_sale.sql b/dbt/models/default/default.vw_pin_sale.sql index 14430d981..e84d10dd7 100644 --- a/dbt/models/default/default.vw_pin_sale.sql +++ b/dbt/models/default/default.vw_pin_sale.sql @@ -367,7 +367,69 @@ SELECT -- If there is no override, default to sv_is_outlier WHEN sales_val.sv_is_outlier IS NOT NULL THEN sales_val.sv_is_outlier - END AS is_outlier + END AS is_outlier, + -- Combined outlier reasons: manual override reasons + model SV reasons + ARRAY_DISTINCT( + CONCAT( + -- Manual analyst override triggers + FILTER( + ARRAY[ + IF( + COALESCE(flag_override.is_arms_length = FALSE, FALSE), + 'Analyst: Non-arms length' + ), + IF( + COALESCE(flag_override.is_flip = TRUE, FALSE), + 'Analyst: Flip' + ), + IF( + COALESCE(flag_override.has_class_change = TRUE, FALSE), + 'Analyst: Class change' + ), + IF( + COALESCE( + flag_override.has_characteristic_change + = 'yes_major', + FALSE + ), + 'Analyst: Characteristic change' + ), + IF( + COALESCE( + flag_override.requires_field_check = TRUE, FALSE + ), + 'Analyst: Requires field check' + ) + ], + r -> r IS NOT NULL + ), + + -- Sales val statistical model reasons (sv_outlier_reason1-3) + FILTER( + ARRAY[ + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason1), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason2), + CONCAT('SV pipeline: ', sales_val.sv_outlier_reason3) + ], + r -> r IS NOT NULL AND TRIM(r) != 'SV pipeline:' + ) + ) + ) AS outlier_reason, + -- Logic similar to the is_outlier field but lets us know explicity + -- if the is_outlier column determination is sourced from an analyst + -- override or an algorithmic fallback. + CASE + WHEN + flag_override.is_arms_length IS NOT NULL + OR flag_override.is_flip IS NOT NULL + OR flag_override.has_class_change IS NOT NULL + OR flag_override.has_characteristic_change IS NOT NULL + OR flag_override.requires_field_check IS NOT NULL + THEN 'analyst' + + WHEN sales_val.sv_is_outlier IS NOT NULL + THEN 'algorithm' + END AS source_is_outlier FROM unique_sales LEFT JOIN mydec_sales ON unique_sales.doc_no = mydec_sales.doc_no diff --git a/dbt/models/default/schema/default.vw_pin_sale.yml b/dbt/models/default/schema/default.vw_pin_sale.yml index 9ce4e3ffd..1c7fa6208 100644 --- a/dbt/models/default/schema/default.vw_pin_sale.yml +++ b/dbt/models/default/schema/default.vw_pin_sale.yml @@ -25,11 +25,11 @@ models: description: Indicator for whether or not the observation uses the MyDec sale date - name: is_outlier description: | - The final determination indicating whether a sale is a statistical + The final determination indicating whether a sale is an outlier that should be excluded from model training. - Combines information from our sales validation model with information - from analysts who review our outliers for correctness. + Combines information from our statistical sales validation model with + information from analysts who review our outliers for correctness. - name: mydec_deed_type description: Deed type from MyDec, more granular than CCAO deed type - name: mydec_line_8_current_use @@ -72,6 +72,10 @@ models: description: '{{ doc("shared_column_nbhd_code") }}' - name: num_parcels_sale description: '{{ doc("shared_column_num_parcels_sale") }}' + - name: outlier_reason + description: This is an array colum that contains all of the distinct outlier reasons between analyst review and our + algorithmic sales validation model. A nuance is that there are some outlier reasons that don't necessarily + cause the `is_outlier` status to be true. - name: pin description: '{{ doc("shared_column_pin") }}' - name: requires_field_check @@ -92,6 +96,9 @@ models: description: '{{ doc("shared_column_sale_price") }}' - name: seller_name description: '{{ doc("shared_column_seller_name") }}' + - name: source_is_outlier + description: This field tells us if the determination in `is_outlier` was sourced from analyst review or from our algorithmic + sales validation pipeline. - name: sv_is_heuristic_outlier description: '{{ doc("shared_column_sv_is_heuristic_outlier") }}' - name: sv_is_outlier