From db855a5e5c40f0750806e4da8e1c1d40652e2e3b Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Wed, 17 May 2023 17:00:16 +0200 Subject: [PATCH 1/6] add ties_okay and fix ties handling --- .../models/schema_tests/schema.yml | 66 +++++++++++++++++++ ..._column_most_common_value_to_be_in_set.sql | 24 +++++-- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index f9028f8..1d6af47 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -505,6 +505,23 @@ models: value_set: [0.5] top_n: 1 quote_values: false + # expect error if value is column but not most common + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [1] + top_n: 1 + quote_values: false + config: + error_if: "=0" + warn_if: "<>1" + # expect error if value is column but not most common and ties_okay is true + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [1] + top_n: 1 + ties_okay: true + quote_values: false + config: + error_if: "=0" + warn_if: "<>1" - dbt_expectations.expect_column_values_to_be_increasing: sort_column: col_numeric_a strictly: false @@ -538,6 +555,55 @@ models: - dbt_expectations.expect_column_values_to_not_be_in_set: value_set: ['a','c'] quote_values: true + # Expect error if not all most common values are in the set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['b'] + top_n: 1 + config: + error_if: "=0" + warn_if: "<3" + # Expect success if not all most common values are in the set but ties_okay is set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['b'] + top_n: 1 + ties_okay: true + # Expect error if none of the most common values are in the set and ties_okay is set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['invalid_value'] + top_n: 1 + ties_okay: true + config: + error_if: "=0" + warn_if: "<4" + # Expect success if not all most common values are in the set but ties_okay is set + # and the set contains extra values + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['b', 'invalid_value'] + top_n: 1 + ties_okay: true + # Expect success if not all most common values are in the set but ties_okay is set + # and value is not first one of the column naturally ordered + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['ab'] + top_n: 1 + ties_okay: true + # Expect success if all most common values are in the set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['b', 'ab', 'abc', 'abcd'] + top_n: 1 + # Expect success if all most common values are in the set + # and the set contains extra values + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['b', 'ab', 'abc', 'abcd', 'invalid_value'] + top_n: 1 + # Expect error if none of the most common values are in the set + # and the set contains extra values + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['invalid_value1', 'invalid_value2', 'invalid_value3', 'invalid_value4', 'invalid_value5'] + top_n: 1 + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_value_lengths_to_be_between: min_value: 1 max_value: 4 diff --git a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql index 11ef689..316193c 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -4,11 +4,12 @@ top_n, quote_values=True, data_type="decimal", - row_condition=None + row_condition=None, + ties_okay=False ) -%} {{ adapter.dispatch('test_expect_column_most_common_value_to_be_in_set', 'dbt_expectations') ( - model, column_name, value_set, top_n, quote_values, data_type, row_condition + model, column_name, value_set, top_n, quote_values, data_type, row_condition, ties_okay ) }} {%- endtest %} @@ -19,7 +20,8 @@ top_n, quote_values, data_type, - row_condition + row_condition, + ties_okay ) %} with value_counts as ( @@ -48,7 +50,7 @@ value_counts_ranked as ( select *, - row_number() over(order by value_count desc) as value_count_rank + rank() over(order by value_count desc) as value_count_rank from value_counts @@ -83,15 +85,23 @@ unique_set_values as ( set_values ), -validation_errors as ( - -- values from the model that are not in the set +most_common_values_not_in_set as ( select value_field from value_count_top_n where value_field not in (select value_field from unique_set_values) - +), +validation_errors as ( + {% if ties_okay -%} + select mcvnis.* from most_common_values_not_in_set mcvnis + , (select count(*) as cnt from most_common_values_not_in_set) as most_common_values_not_in_set_cnt + , (select count(*) as cnt from value_count_top_n) as most_common_values_cnt + where most_common_values_not_in_set_cnt.cnt >= most_common_values_cnt.cnt + {%- else -%} + select * from most_common_values_not_in_set + {%- endif -%} ) select * From 3e742e4c96f99134edae908bd44c26a33c0d4cd6 Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Thu, 18 May 2023 11:01:42 +0200 Subject: [PATCH 2/6] add ties_okay to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 948e3e0..16af6e9 100644 --- a/README.md +++ b/README.md @@ -1011,6 +1011,8 @@ tests: quote_values: true # (Optional. Default is 'true'.) data_type: "decimal" # (Optional. Default is 'decimal') strictly: false # (Optional. Default is 'false'. Adds an 'or equal to' to the comparison operator for min/max) + ties_okay: true # (Optional. Default is 'false'. If true, the expectation will succeed if values outside + # the designated set are as common (but not more common) than designated values) ``` ### [expect_column_max_to_be_between](macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql) From 4a7501aae20909d635b3f091c5e727ff4dc365ab Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Fri, 19 May 2023 01:38:57 +0200 Subject: [PATCH 3/6] renamed ties param, updated formatting --- README.md | 8 ++-- .../models/schema_tests/schema.yml | 20 ++++---- ..._column_most_common_value_to_be_in_set.sql | 47 ++++++++++++++----- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 16af6e9..8c08981 100644 --- a/README.md +++ b/README.md @@ -1009,10 +1009,12 @@ tests: value_set: [0.5] top_n: 1 quote_values: true # (Optional. Default is 'true'.) - data_type: "decimal" # (Optional. Default is 'decimal') + data_type: "decimal" # (Optional. Default is adapter-specific equivalent of 'decimal' with a scale provided by dbt. + # Using decimal/numeric without scale might result in unexpected behaviour with Snowflake where scale + # defaults to 0 resulting in values being rounded) strictly: false # (Optional. Default is 'false'. Adds an 'or equal to' to the comparison operator for min/max) - ties_okay: true # (Optional. Default is 'false'. If true, the expectation will succeed if values outside - # the designated set are as common (but not more common) than designated values) + allow_ties: true # (Optional. Default is 'false'. If true, the expectation will succeed if values outside + # the designated set are as common (but not more common) than designated values) ``` ### [expect_column_max_to_be_between](macros/schema_tests/aggregate_functions/expect_column_max_to_be_between.sql) diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 1d6af47..7a9c908 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -513,11 +513,11 @@ models: config: error_if: "=0" warn_if: "<>1" - # expect error if value is column but not most common and ties_okay is true + # expect error if value is column but not most common and allow_ties is true - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [1] top_n: 1 - ties_okay: true + allow_ties: true quote_values: false config: error_if: "=0" @@ -562,31 +562,31 @@ models: config: error_if: "=0" warn_if: "<3" - # Expect success if not all most common values are in the set but ties_okay is set + # Expect success if not all most common values are in the set but allow_ties is set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b'] top_n: 1 - ties_okay: true - # Expect error if none of the most common values are in the set and ties_okay is set + allow_ties: true + # Expect error if none of the most common values are in the set and allow_ties is set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['invalid_value'] top_n: 1 - ties_okay: true + allow_ties: true config: error_if: "=0" warn_if: "<4" - # Expect success if not all most common values are in the set but ties_okay is set + # Expect success if not all most common values are in the set but allow_ties is set # and the set contains extra values - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b', 'invalid_value'] top_n: 1 - ties_okay: true - # Expect success if not all most common values are in the set but ties_okay is set + allow_ties: true + # Expect success if not all most common values are in the set but allow_ties is set # and value is not first one of the column naturally ordered - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['ab'] top_n: 1 - ties_okay: true + allow_ties: true # Expect success if all most common values are in the set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b', 'ab', 'abc', 'abcd'] diff --git a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql index 316193c..4b4bee6 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -3,13 +3,15 @@ value_set, top_n, quote_values=True, - data_type="decimal", + data_type=None, row_condition=None, - ties_okay=False + allow_ties=False ) -%} - + {# For Snowflake, using a default 'decimal' instead of dbt.type_numeric() + rounds up the value when casting #} + {% set data_type = dbt.type_numeric() if not data_type else data_type %} {{ adapter.dispatch('test_expect_column_most_common_value_to_be_in_set', 'dbt_expectations') ( - model, column_name, value_set, top_n, quote_values, data_type, row_condition, ties_okay + model, column_name, value_set, top_n, quote_values, data_type, row_condition, allow_ties ) }} {%- endtest %} @@ -21,9 +23,9 @@ quote_values, data_type, row_condition, - ties_okay + allow_ties ) %} - +{% set data_type = data_type %} with value_counts as ( select @@ -93,14 +95,35 @@ most_common_values_not_in_set as ( where value_field not in (select value_field from unique_set_values) ), +most_common_values_in_set as ( + select + value_field + from + value_count_top_n + {{ dbt.except() }} + select + value_field + from + most_common_values_not_in_set +), validation_errors as ( - {% if ties_okay -%} - select mcvnis.* from most_common_values_not_in_set mcvnis - , (select count(*) as cnt from most_common_values_not_in_set) as most_common_values_not_in_set_cnt - , (select count(*) as cnt from value_count_top_n) as most_common_values_cnt - where most_common_values_not_in_set_cnt.cnt >= most_common_values_cnt.cnt + {% if allow_ties -%} + select + * + from + most_common_values_not_in_set + where + {# + If the intersection between the most common values and the values in the set is not empty, + succeed. Otherwise fail the test and select all the most common values from the column. + #} + ( + select count(*) + from most_common_values_in_set + ) = 0 {%- else -%} - select * from most_common_values_not_in_set + select * + from most_common_values_not_in_set {%- endif -%} ) From 8a1906b175ad5bdeb78204c5b962fbb3f4e9da39 Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Fri, 19 May 2023 16:16:44 +0200 Subject: [PATCH 4/6] fixed top_n issue --- README.md | 2 +- .../models/schema_tests/data_test.sql | 4 +- .../models/schema_tests/schema.yml | 66 +++++++++++++++---- ..._column_most_common_value_to_be_in_set.sql | 10 +-- 4 files changed, 63 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 8c08981..5318edb 100644 --- a/README.md +++ b/README.md @@ -1013,7 +1013,7 @@ tests: # Using decimal/numeric without scale might result in unexpected behaviour with Snowflake where scale # defaults to 0 resulting in values being rounded) strictly: false # (Optional. Default is 'false'. Adds an 'or equal to' to the comparison operator for min/max) - allow_ties: true # (Optional. Default is 'false'. If true, the expectation will succeed if values outside + ties_okay: true # (Optional. Default is 'false'. If true, the expectation will succeed if values outside # the designated set are as common (but not more common) than designated values) ``` diff --git a/integration_tests/models/schema_tests/data_test.sql b/integration_tests/models/schema_tests/data_test.sql index cca11ac..1802852 100644 --- a/integration_tests/models/schema_tests/data_test.sql +++ b/integration_tests/models/schema_tests/data_test.sql @@ -1,8 +1,8 @@ select 1 as idx, '2020-10-21' as date_col, - cast(0 as {{ dbt.type_float() }}) as col_numeric_a, - cast(1 as {{ dbt.type_float() }}) as col_numeric_b, + cast(0 as {{ dbt.type_numeric() }}) as col_numeric_a, + cast(1 as {{ dbt.type_numeric() }}) as col_numeric_b, 'a' as col_string_a, 'b' as col_string_b, cast(null as {{ dbt.type_string() }}) as col_null, diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 7a9c908..33f268e 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -505,7 +505,32 @@ models: value_set: [0.5] top_n: 1 quote_values: false - # expect error if value is column but not most common + # Expect success if all most common values are in set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [0.5, 0, 1] + top_n: 2 + quote_values: false + # Expect failure if not all most common values are in set + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [0.5, 0] + top_n: 2 + quote_values: false + config: + error_if: "=0" + warn_if: "<>1" + # Expect success if some of the most common values are in set and ties_okay is true + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [0.5, 0] + top_n: 2 + ties_okay: true + quote_values: false + # Expect success if any of the top 2 most common levels value are in set and ties_okay is true + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [0] + top_n: 2 + ties_okay: true + quote_values: false + # Expect error if value is in column but not most common - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [1] top_n: 1 @@ -513,15 +538,24 @@ models: config: error_if: "=0" warn_if: "<>1" - # expect error if value is column but not most common and allow_ties is true + # Expect error if value is in column but not most common and ties_okay is true - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [1] top_n: 1 - allow_ties: true + ties_okay: true quote_values: false config: error_if: "=0" warn_if: "<>1" + # Expect error if value not in column at any level + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [123456789] + top_n: > + (select count(*) from {{ref('data_test')}}) + quote_values: false + config: + error_if: "=0" + warn_if: "<>3" - dbt_expectations.expect_column_values_to_be_increasing: sort_column: col_numeric_a strictly: false @@ -562,31 +596,31 @@ models: config: error_if: "=0" warn_if: "<3" - # Expect success if not all most common values are in the set but allow_ties is set + # Expect success if not all most common values are in the set but ties_okay is set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b'] top_n: 1 - allow_ties: true - # Expect error if none of the most common values are in the set and allow_ties is set + ties_okay: true + # Expect error if none of the most common values are in the set and ties_okay is set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['invalid_value'] top_n: 1 - allow_ties: true + ties_okay: true config: error_if: "=0" warn_if: "<4" - # Expect success if not all most common values are in the set but allow_ties is set + # Expect success if not all most common values are in the set but ties_okay is set # and the set contains extra values - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b', 'invalid_value'] top_n: 1 - allow_ties: true - # Expect success if not all most common values are in the set but allow_ties is set + ties_okay: true + # Expect success if not all most common values are in the set but ties_okay is set # and value is not first one of the column naturally ordered - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['ab'] top_n: 1 - allow_ties: true + ties_okay: true # Expect success if all most common values are in the set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: ['b', 'ab', 'abc', 'abcd'] @@ -604,6 +638,16 @@ models: config: error_if: "=0" warn_if: "<4" + # Expect error if none of the most common values are in the set + # and the set contains extra values + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: ['invalid_value1', 'invalid_value2', 'invalid_value3', 'invalid_value4', 'invalid_value5'] + top_n: > + (select count(*) from {{ref('data_test')}}) + ties_okay: true + config: + error_if: "=0" + warn_if: "<4" - dbt_expectations.expect_column_value_lengths_to_be_between: min_value: 1 max_value: 4 diff --git a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql index 4b4bee6..7b5f700 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -5,13 +5,13 @@ quote_values=True, data_type=None, row_condition=None, - allow_ties=False + ties_okay=False ) -%} {# For Snowflake, using a default 'decimal' instead of dbt.type_numeric() rounds up the value when casting #} {% set data_type = dbt.type_numeric() if not data_type else data_type %} {{ adapter.dispatch('test_expect_column_most_common_value_to_be_in_set', 'dbt_expectations') ( - model, column_name, value_set, top_n, quote_values, data_type, row_condition, allow_ties + model, column_name, value_set, top_n, quote_values, data_type, row_condition, ties_okay ) }} {%- endtest %} @@ -23,7 +23,7 @@ quote_values, data_type, row_condition, - allow_ties + ties_okay ) %} {% set data_type = data_type %} with value_counts as ( @@ -64,7 +64,7 @@ value_count_top_n as ( from value_counts_ranked where - value_count_rank = {{ top_n }} + value_count_rank <= {{ top_n }} ), set_values as ( @@ -107,7 +107,7 @@ most_common_values_in_set as ( most_common_values_not_in_set ), validation_errors as ( - {% if allow_ties -%} + {% if ties_okay -%} select * from From 6cdf6ce936aa0f2691d2d698f01b0940c5eb64db Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Fri, 19 May 2023 18:29:50 +0200 Subject: [PATCH 5/6] adds changes from fix/issue-258-claus --- integration_tests/models/schema_tests/data_test.sql | 12 ++++++------ integration_tests/models/schema_tests/schema.yml | 12 +++++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/integration_tests/models/schema_tests/data_test.sql b/integration_tests/models/schema_tests/data_test.sql index 1802852..db8bf23 100644 --- a/integration_tests/models/schema_tests/data_test.sql +++ b/integration_tests/models/schema_tests/data_test.sql @@ -13,8 +13,8 @@ union all select 2 as idx, '2020-10-22' as date_col, - 1 as col_numeric_a, - 0 as col_numeric_b, + cast(1 as {{ dbt.type_numeric() }}) as col_numeric_a, + cast(0 as {{ dbt.type_numeric() }}) as col_numeric_b, 'b' as col_string_a, 'ab' as col_string_b, null as col_null, @@ -25,8 +25,8 @@ union all select 3 as idx, '2020-10-23' as date_col, - 0.5 as col_numeric_a, - 0.5 as col_numeric_b, + cast(0.5 as {{ dbt.type_numeric() }}) as col_numeric_a, + cast(0.5 as {{ dbt.type_numeric() }}) as col_numeric_b, 'c' as col_string_a, 'abc' as col_string_b, null as col_null, @@ -37,8 +37,8 @@ union all select 4 as idx, '2020-10-23' as date_col, - 0.5 as col_numeric_a, - 0.5 as col_numeric_b, + cast(0.5 as {{ dbt.type_numeric() }}) as col_numeric_a, + cast(0.5 as {{ dbt.type_numeric() }}) as col_numeric_b, 'c' as col_string_a, 'abcd' as col_string_b, null as col_null, diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml index 33f268e..0928146 100644 --- a/integration_tests/models/schema_tests/schema.yml +++ b/integration_tests/models/schema_tests/schema.yml @@ -505,12 +505,12 @@ models: value_set: [0.5] top_n: 1 quote_values: false - # Expect success if all most common values are in set + # Expect success if all most common values at all n levels are in set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [0.5, 0, 1] top_n: 2 quote_values: false - # Expect failure if not all most common values are in set + # Expect failure if not all most common values at all n levels are in set - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [0.5, 0] top_n: 2 @@ -518,7 +518,7 @@ models: config: error_if: "=0" warn_if: "<>1" - # Expect success if some of the most common values are in set and ties_okay is true + # Expect success if some of the most common values at all n levels are in set and ties_okay is true - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [0.5, 0] top_n: 2 @@ -530,6 +530,12 @@ models: top_n: 2 ties_okay: true quote_values: false + # Expect success if any of the top most common level value is in set and ties_okay is true + - dbt_expectations.expect_column_most_common_value_to_be_in_set: + value_set: [0.5] + top_n: 2 + ties_okay: true + quote_values: false # Expect error if value is in column but not most common - dbt_expectations.expect_column_most_common_value_to_be_in_set: value_set: [1] From bbbffd253814190bb3ab07a8dd95094ff007fdf9 Mon Sep 17 00:00:00 2001 From: lookslikeitsnot Date: Wed, 24 May 2023 19:03:47 +0200 Subject: [PATCH 6/6] rephrase most common values ties_okay logic --- ...ect_column_most_common_value_to_be_in_set.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql index 7b5f700..25ac36b 100644 --- a/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql +++ b/macros/schema_tests/aggregate_functions/expect_column_most_common_value_to_be_in_set.sql @@ -25,7 +25,6 @@ row_condition, ties_okay ) %} -{% set data_type = data_type %} with value_counts as ( select @@ -95,16 +94,17 @@ most_common_values_not_in_set as ( where value_field not in (select value_field from unique_set_values) ), +{# Get the partial matches for ties_okay #} most_common_values_in_set as ( select value_field from value_count_top_n - {{ dbt.except() }} + {{ dbt.intersect() }} select value_field from - most_common_values_not_in_set + unique_set_values ), validation_errors as ( {% if ties_okay -%} @@ -114,13 +114,13 @@ validation_errors as ( most_common_values_not_in_set where {# - If the intersection between the most common values and the values in the set is not empty, - succeed. Otherwise fail the test and select all the most common values from the column. + If the intersection between the most common values and the values in the set is not empty, succeed. + Otherwise fail the test and select all the most common values from the column not in the set. #} - ( - select count(*) + not exists ( + select 1 from most_common_values_in_set - ) = 0 + ) {%- else -%} select * from most_common_values_not_in_set