From 0102f4ba69163edede7e48ecf23bb7569e6ef376 Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Sun, 28 Sep 2025 16:49:41 +0200 Subject: [PATCH 1/6] #17801 Improve nullability reporting of case expressions --- datafusion/core/tests/tpcds_planning.rs | 3 +- datafusion/expr/src/expr_fn.rs | 5 + datafusion/expr/src/expr_schema.rs | 199 +++++++++++++- .../physical-expr/src/expressions/case.rs | 251 +++++++++++++++++- 4 files changed, 449 insertions(+), 9 deletions(-) diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs index 252d76d0f9d9..bee3b48a574b 100644 --- a/datafusion/core/tests/tpcds_planning.rs +++ b/datafusion/core/tests/tpcds_planning.rs @@ -1052,9 +1052,10 @@ async fn regression_test(query_no: u8, create_physical: bool) -> Result<()> { for sql in &sql { let df = ctx.sql(sql).await?; let (state, plan) = df.into_parts(); - let plan = state.optimize(&plan)?; if create_physical { let _ = state.create_physical_plan(&plan).await?; + } else { + let _ = state.optimize(&plan)?; } } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4d8b94ba27ff..08ffab8e426b 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -340,6 +340,11 @@ pub fn is_null(expr: Expr) -> Expr { Expr::IsNull(Box::new(expr)) } +/// Create is not null expression +pub fn is_not_null(expr: Expr) -> Expr { + Expr::IsNotNull(Box::new(expr)) +} + /// Create is true expression pub fn is_true(expr: Expr) -> Expr { Expr::IsTrue(Box::new(expr)) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index e803e3534130..553882619252 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -32,6 +32,7 @@ use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, Result, Spans, TableReference, }; +use datafusion_expr_common::operator::Operator; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use std::sync::Arc; @@ -283,6 +284,11 @@ impl ExprSchemable for Expr { let then_nullable = case .when_then_expr .iter() + .filter(|(w, t)| { + // Disregard branches where we can determine statically that the predicate + // is always false when the then expression would evaluate to null + const_result_when_value_is_null(w, t).unwrap_or(true) + }) .map(|(_, t)| t.nullable(input_schema)) .collect::>>()?; if then_nullable.contains(&true) { @@ -647,6 +653,50 @@ impl ExprSchemable for Expr { } } +/// Determines if the given `predicate` can be const evaluated if `value` were to evaluate to `NULL`. +/// Returns a `Some` value containing the const result if so; otherwise returns `None`. +fn const_result_when_value_is_null(predicate: &Expr, value: &Expr) -> Option { + match predicate { + Expr::IsNotNull(e) => { + if e.as_ref().eq(value) { + Some(false) + } else { + None + } + } + Expr::IsNull(e) => { + if e.as_ref().eq(value) { + Some(true) + } else { + None + } + } + Expr::Not(e) => const_result_when_value_is_null(e, value).map(|b| !b), + Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { + Operator::And => { + let l = const_result_when_value_is_null(left, value); + let r = const_result_when_value_is_null(right, value); + match (l, r) { + (Some(l), Some(r)) => Some(l && r), + (Some(l), None) => Some(l), + (None, Some(r)) => Some(r), + _ => None, + } + } + Operator::Or => { + let l = const_result_when_value_is_null(left, value); + let r = const_result_when_value_is_null(right, value); + match (l, r) { + (Some(l), Some(r)) => Some(l || r), + _ => None, + } + } + _ => None, + }, + _ => None, + } +} + impl Expr { /// Common method for window functions that applies type coercion /// to all arguments of the window function to check if it matches @@ -777,7 +827,10 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result MockExprSchema, + ) -> Result<()> { + assert_eq!( + expr.nullable(&get_schema(true))?, + nullable, + "Nullability of '{expr}' should be {nullable} when column is nullable" + ); + assert!( + !expr.nullable(&get_schema(false))?, + "Nullability of '{expr}' should be false when column is not nullable" + ); + Ok(()) + } + + #[test] + fn test_case_expression_nullability() -> Result<()> { + let get_schema = |nullable| { + MockExprSchema::new() + .with_data_type(DataType::Int32) + .with_nullable(nullable) + }; + + check_nullability( + when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?, + false, + get_schema, + )?; + + check_nullability( + when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?, + false, + get_schema, + )?; + + check_nullability( + when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo")) + .otherwise(lit(0))?, + true, + get_schema, + )?; + + check_nullability( + when( + and( + is_not_null(col("foo")), + binary_expr(col("foo"), Operator::Eq, lit(5)), + ), + col("foo"), + ) + .otherwise(lit(0))?, + false, + get_schema, + )?; + + check_nullability( + when( + and( + binary_expr(col("foo"), Operator::Eq, lit(5)), + is_not_null(col("foo")), + ), + col("foo"), + ) + .otherwise(lit(0))?, + false, + get_schema, + )?; + + check_nullability( + when( + or( + is_not_null(col("foo")), + binary_expr(col("foo"), Operator::Eq, lit(5)), + ), + col("foo"), + ) + .otherwise(lit(0))?, + true, + get_schema, + )?; + + check_nullability( + when( + or( + binary_expr(col("foo"), Operator::Eq, lit(5)), + is_not_null(col("foo")), + ), + col("foo"), + ) + .otherwise(lit(0))?, + true, + get_schema, + )?; + + check_nullability( + when( + or( + is_not_null(col("foo")), + binary_expr(col("foo"), Operator::Eq, lit(5)), + ), + col("foo"), + ) + .otherwise(lit(0))?, + true, + get_schema, + )?; + + check_nullability( + when( + or( + binary_expr(col("foo"), Operator::Eq, lit(5)), + is_not_null(col("foo")), + ), + col("foo"), + ) + .otherwise(lit(0))?, + true, + get_schema, + )?; + + check_nullability( + when( + or( + and( + binary_expr(col("foo"), Operator::Eq, lit(5)), + is_not_null(col("foo")), + ), + and( + binary_expr(col("foo"), Operator::Eq, col("bar")), + is_not_null(col("foo")), + ), + ), + col("foo"), + ) + .otherwise(lit(0))?, + false, + get_schema, + )?; + + Ok(()) + } + #[test] fn test_inlist_nullability() { let get_schema = |nullable| { diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 65a210826664..5dbcecdf224c 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -15,12 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::expressions::try_cast; +use crate::expressions::{try_cast, BinaryExpr, IsNotNullExpr, IsNullExpr, NotExpr}; use crate::PhysicalExpr; -use std::borrow::Cow; -use std::hash::Hash; -use std::{any::Any, sync::Arc}; - use arrow::array::*; use arrow::compute::kernels::zip::zip; use arrow::compute::{and, and_not, is_null, not, nullif, or, prep_null_mask_filter}; @@ -30,8 +26,12 @@ use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, DataFusionError, Result, ScalarValue, }; use datafusion_expr::ColumnarValue; +use std::borrow::Cow; +use std::hash::Hash; +use std::{any::Any, sync::Arc}; use super::{Column, Literal}; +use datafusion_expr_common::operator::Operator; use datafusion_physical_expr_common::datum::compare_with_eq; use itertools::Itertools; @@ -481,6 +481,11 @@ impl PhysicalExpr for CaseExpr { let then_nullable = self .when_then_expr .iter() + .filter(|(w, t)| { + // Disregard branches where we can determine statically that the predicate + // is always false when the then expression would evaluate to null + const_result_when_value_is_null(w.as_ref(), t.as_ref()).unwrap_or(true) + }) .map(|(_, t)| t.nullable(input_schema)) .collect::>>()?; if then_nullable.contains(&true) { @@ -588,6 +593,54 @@ impl PhysicalExpr for CaseExpr { } } +/// Determines if the given `predicate` can be const evaluated if `value` were to evaluate to `NULL`. +/// Returns a `Some` value containing the const result if so; otherwise returns `None`. +fn const_result_when_value_is_null( + predicate: &dyn PhysicalExpr, + value: &dyn PhysicalExpr, +) -> Option { + let predicate_any = predicate.as_any(); + if let Some(not_null) = predicate_any.downcast_ref::() { + if not_null.arg().as_ref().dyn_eq(value) { + Some(false) + } else { + None + } + } else if let Some(null) = predicate_any.downcast_ref::() { + if null.arg().as_ref().dyn_eq(value) { + Some(true) + } else { + None + } + } else if let Some(not) = predicate_any.downcast_ref::() { + const_result_when_value_is_null(not.arg().as_ref(), value).map(|b| !b) + } else if let Some(binary) = predicate_any.downcast_ref::() { + match binary.op() { + Operator::And => { + let l = const_result_when_value_is_null(binary.left().as_ref(), value); + let r = const_result_when_value_is_null(binary.right().as_ref(), value); + match (l, r) { + (Some(l), Some(r)) => Some(l && r), + (Some(l), None) => Some(l), + (None, Some(r)) => Some(r), + _ => None, + } + } + Operator::Or => { + let l = const_result_when_value_is_null(binary.left().as_ref(), value); + let r = const_result_when_value_is_null(binary.right().as_ref(), value); + match (l, r) { + (Some(l), Some(r)) => Some(l || r), + _ => None, + } + } + _ => None, + } + } else { + None + } +} + /// Create a CASE expression pub fn case( expr: Option>, @@ -601,7 +654,8 @@ pub fn case( mod tests { use super::*; - use crate::expressions::{binary, cast, col, lit, BinaryExpr}; + use crate::expressions; + use crate::expressions::{binary, cast, col, is_not_null, lit, BinaryExpr}; use arrow::buffer::Buffer; use arrow::datatypes::DataType::Float64; use arrow::datatypes::Field; @@ -609,7 +663,6 @@ mod tests { use datafusion_common::plan_err; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_expr::type_coercion::binary::comparison_coercion; - use datafusion_expr::Operator; use datafusion_physical_expr_common::physical_expr::fmt_sql; #[test] @@ -1436,4 +1489,188 @@ mod tests { Ok(()) } + + fn when_then_else( + when: &Arc, + then: &Arc, + els: &Arc, + ) -> Result> { + let case = CaseExpr::try_new( + None, + vec![(Arc::clone(when), Arc::clone(then))], + Some(Arc::clone(els)), + )?; + Ok(Arc::new(case)) + } + + #[test] + fn test_case_expression_nullability_with_nullable_column() -> Result<()> { + case_expression_nullability(true) + } + + #[test] + fn test_case_expression_nullability_with_not_nullable_column() -> Result<()> { + case_expression_nullability(false) + } + + fn case_expression_nullability(col_is_nullable: bool) -> Result<()> { + let schema = + Schema::new(vec![Field::new("foo", DataType::Int32, col_is_nullable)]); + + let foo = col("foo", &schema)?; + let foo_is_not_null = is_not_null(Arc::clone(&foo))?; + let foo_is_null = expressions::is_null(Arc::clone(&foo))?; + let not_foo_is_null = expressions::not(Arc::clone(&foo_is_null))?; + let zero = lit(0); + let foo_eq_zero = + binary(Arc::clone(&foo), Operator::Eq, Arc::clone(&zero), &schema)?; + + assert_not_nullable(when_then_else(&foo_is_not_null, &foo, &zero)?, &schema); + assert_not_nullable(when_then_else(¬_foo_is_null, &foo, &zero)?, &schema); + assert_nullability( + when_then_else(&foo_eq_zero, &foo, &zero)?, + &schema, + col_is_nullable, + ); + + assert_not_nullable( + when_then_else( + &binary( + Arc::clone(&foo_is_not_null), + Operator::And, + Arc::clone(&foo_eq_zero), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + ); + + assert_not_nullable( + when_then_else( + &binary( + Arc::clone(&foo_eq_zero), + Operator::And, + Arc::clone(&foo_is_not_null), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + ); + + assert_nullability( + when_then_else( + &binary( + Arc::clone(&foo_is_not_null), + Operator::Or, + Arc::clone(&foo_eq_zero), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + col_is_nullable, + ); + + assert_nullability( + when_then_else( + &binary( + Arc::clone(&foo_eq_zero), + Operator::Or, + Arc::clone(&foo_is_not_null), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + col_is_nullable, + ); + + assert_nullability( + when_then_else( + &binary( + Arc::clone(&foo_is_not_null), + Operator::Or, + Arc::clone(&foo_eq_zero), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + col_is_nullable, + ); + + assert_nullability( + when_then_else( + &binary( + binary(Arc::clone(&foo), Operator::Eq, Arc::clone(&zero), &schema)?, + Operator::Or, + Arc::clone(&foo_is_not_null), + &schema, + )?, + &foo, + &zero, + )?, + &schema, + col_is_nullable, + ); + + assert_not_nullable( + when_then_else( + &binary( + binary( + binary( + Arc::clone(&foo), + Operator::Eq, + Arc::clone(&zero), + &schema, + )?, + Operator::And, + Arc::clone(&foo_is_not_null), + &schema, + )?, + Operator::Or, + binary( + binary( + Arc::clone(&foo), + Operator::Eq, + Arc::clone(&foo), + &schema, + )?, + Operator::And, + Arc::clone(&foo_is_not_null), + &schema, + )?, + &schema, + )?, + &foo, + &zero, + )?, + &schema, + ); + + Ok(()) + } + + fn assert_not_nullable(expr: Arc, schema: &Schema) { + assert!(!expr.nullable(schema).unwrap()); + } + + fn assert_nullable(expr: Arc, schema: &Schema) { + assert!(expr.nullable(schema).unwrap()); + } + + fn assert_nullability(expr: Arc, schema: &Schema, nullable: bool) { + if nullable { + assert_nullable(expr, schema); + } else { + assert_not_nullable(expr, schema); + } + } } From 753715e67097371991ec86dde1a46d8e0f571377 Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Mon, 29 Sep 2025 19:10:02 +0200 Subject: [PATCH 2/6] Clarify logical expression test cases --- datafusion/expr/src/expr_schema.rs | 212 +++++++++++------------------ 1 file changed, 76 insertions(+), 136 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 553882619252..8bc92cb2b84a 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -28,6 +28,7 @@ use crate::udf::ReturnFieldArgs; use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::tree_node::TreeNode; use datafusion_common::{ not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, Result, Spans, TableReference, @@ -827,10 +828,7 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result MockExprSchema, - ) -> Result<()> { + fn assert_nullability(expr: &Expr, schema: &dyn ExprSchema, nullable: bool) { assert_eq!( - expr.nullable(&get_schema(true))?, + expr.nullable(schema).unwrap(), nullable, - "Nullability of '{expr}' should be {nullable} when column is nullable" - ); - assert!( - !expr.nullable(&get_schema(false))?, - "Nullability of '{expr}' should be false when column is not nullable" + "Nullability of '{expr}' should be {nullable}" ); - Ok(()) + } + + fn assert_not_nullable(expr: &Expr, schema: &dyn ExprSchema) { + assert_nullability(expr, schema, false); + } + + fn assert_nullable(expr: &Expr, schema: &dyn ExprSchema) { + assert_nullability(expr, schema, true); } #[test] fn test_case_expression_nullability() -> Result<()> { - let get_schema = |nullable| { - MockExprSchema::new() - .with_data_type(DataType::Int32) - .with_nullable(nullable) - }; + let nullable_schema = MockExprSchema::new() + .with_data_type(DataType::Int32) + .with_nullable(true); - check_nullability( - when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?, - false, - get_schema, - )?; - - check_nullability( - when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?, - false, - get_schema, - )?; - - check_nullability( - when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo")) - .otherwise(lit(0))?, - true, - get_schema, - )?; - - check_nullability( - when( - and( - is_not_null(col("foo")), - binary_expr(col("foo"), Operator::Eq, lit(5)), - ), - col("foo"), - ) - .otherwise(lit(0))?, - false, - get_schema, - )?; - - check_nullability( - when( - and( - binary_expr(col("foo"), Operator::Eq, lit(5)), - is_not_null(col("foo")), - ), - col("foo"), - ) - .otherwise(lit(0))?, - false, - get_schema, - )?; - - check_nullability( - when( - or( - is_not_null(col("foo")), - binary_expr(col("foo"), Operator::Eq, lit(5)), - ), - col("foo"), - ) - .otherwise(lit(0))?, - true, - get_schema, - )?; - - check_nullability( - when( - or( - binary_expr(col("foo"), Operator::Eq, lit(5)), - is_not_null(col("foo")), - ), - col("foo"), - ) - .otherwise(lit(0))?, - true, - get_schema, - )?; - - check_nullability( - when( - or( - is_not_null(col("foo")), - binary_expr(col("foo"), Operator::Eq, lit(5)), - ), - col("foo"), - ) - .otherwise(lit(0))?, - true, - get_schema, - )?; - - check_nullability( - when( - or( - binary_expr(col("foo"), Operator::Eq, lit(5)), - is_not_null(col("foo")), - ), - col("foo"), - ) - .otherwise(lit(0))?, - true, - get_schema, - )?; - - check_nullability( - when( - or( - and( - binary_expr(col("foo"), Operator::Eq, lit(5)), - is_not_null(col("foo")), - ), - and( - binary_expr(col("foo"), Operator::Eq, col("bar")), - is_not_null(col("foo")), - ), - ), - col("foo"), - ) - .otherwise(lit(0))?, - false, - get_schema, - )?; + let not_nullable_schema = MockExprSchema::new() + .with_data_type(DataType::Int32) + .with_nullable(false); + + // CASE WHEN x IS NOT NULL THEN x ELSE 0 + let e1 = when(col("x").is_not_null(), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e1, &nullable_schema); + assert_not_nullable(&e1, ¬_nullable_schema); + + // CASE WHEN NOT x IS NULL THEN x ELSE 0 + let e2 = when(not(col("x").is_null()), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e2, &nullable_schema); + assert_not_nullable(&e2, ¬_nullable_schema); + + // CASE WHEN X = 5 THEN x ELSE 0 + let e3 = when(col("x").eq(lit(5)), col("x")).otherwise(lit(0))?; + assert_nullable(&e3, &nullable_schema); + assert_not_nullable(&e3, ¬_nullable_schema); + + // CASE WHEN x IS NOT NULL AND x = 5 THEN x ELSE 0 + let e4 = when(and(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) + .otherwise(lit(0))?; + assert_not_nullable(&e4, &nullable_schema); + assert_not_nullable(&e4, ¬_nullable_schema); + + // CASE WHEN x = 5 AND x IS NOT NULL THEN x ELSE 0 + let e5 = when(and(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) + .otherwise(lit(0))?; + assert_not_nullable(&e5, &nullable_schema); + assert_not_nullable(&e5, ¬_nullable_schema); + + // CASE WHEN x IS NOT NULL OR x = 5 THEN x ELSE 0 + let e6 = when(or(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) + .otherwise(lit(0))?; + assert_nullable(&e6, &nullable_schema); + assert_not_nullable(&e6, ¬_nullable_schema); + + // CASE WHEN x = 5 OR x IS NOT NULL THEN x ELSE 0 + let e7 = when(or(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) + .otherwise(lit(0))?; + assert_nullable(&e7, &nullable_schema); + assert_not_nullable(&e7, ¬_nullable_schema); + + // CASE WHEN (x = 5 AND x IS NOT NULL) OR (x = bar AND x IS NOT NULL) THEN x ELSE 0 + let e8 = when( + or( + and(col("x").eq(lit(5)), col("x").is_not_null()), + and(col("x").eq(col("bar")), col("x").is_not_null()), + ), + col("x"), + ) + .otherwise(lit(0))?; + assert_not_nullable(&e8, &nullable_schema); + assert_not_nullable(&e8, ¬_nullable_schema); + + // CASE WHEN x = 5 OR x IS NULL THEN x ELSE 0 + let e9 = when(or(col("x").eq(lit(5)), col("x").is_null()), col("x")) + .otherwise(lit(0))?; + assert_nullable(&e9, &nullable_schema); + assert_not_nullable(&e9, ¬_nullable_schema); Ok(()) } From c9190d6ebfc0fd2b339ccda58732af89453c370d Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Tue, 30 Sep 2025 15:35:18 +0200 Subject: [PATCH 3/6] WIP --- datafusion/expr/src/expr_schema.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 8bc92cb2b84a..44b541217fe2 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -285,12 +285,27 @@ impl ExprSchemable for Expr { let then_nullable = case .when_then_expr .iter() - .filter(|(w, t)| { - // Disregard branches where we can determine statically that the predicate - // is always false when the then expression would evaluate to null - const_result_when_value_is_null(w, t).unwrap_or(true) + .filter_map(|(w, t)| { + let then_is_nullable = t.nullable(input_schema); + match then_is_nullable { + // Branches with a then expressions that is not nullable can be skipped + Ok(false) => None, + // Pass error determining nullability on verbatim + err @ Err(_) => Some(err), + // For branches with a nullable then expressions try to determine + // statically if the predicate prevents null from being returned. + Ok(true) => match const_result_when_value_is_null(w, t) { + // Static analysis did not provide an answer; assume nullable + None => Some(Ok(true)), + + Some(true) => Some(Ok(true)), + // We can prove that the predicate will always be false if the + // then branch were to evaluate to null. The most common pattern for + // this is `WHEN x IS NOT NULL THEN x`. If x + Some(false) => None, + }, + } }) - .map(|(_, t)| t.nullable(input_schema)) .collect::>>()?; if then_nullable.contains(&true) { Ok(true) From 73a030e344e77b04c82071a47ee8c8cf6204ddf4 Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Tue, 30 Sep 2025 17:20:07 +0200 Subject: [PATCH 4/6] #17838 Attempt to clarify const evaluation logic --- datafusion/expr/src/expr_schema.rs | 84 +++++++++++++++++------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 44b541217fe2..6c9dda23c318 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -28,11 +28,7 @@ use crate::udf::ReturnFieldArgs; use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::tree_node::TreeNode; -use datafusion_common::{ - not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, - Result, Spans, TableReference, -}; +use datafusion_common::{not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, Result, ScalarValue, Spans, TableReference}; use datafusion_expr_common::operator::Operator; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use datafusion_functions_window_common::field::WindowUDFFieldArgs; @@ -281,33 +277,32 @@ impl ExprSchemable for Expr { Expr::OuterReferenceColumn(field, _) => Ok(field.is_nullable()), Expr::Literal(value, _) => Ok(value.is_null()), Expr::Case(case) => { - // This expression is nullable if any of the input expressions are nullable + // This expression is nullable if any of the then expressions are nullable let then_nullable = case .when_then_expr .iter() .filter_map(|(w, t)| { - let then_is_nullable = t.nullable(input_schema); - match then_is_nullable { + match t.nullable(input_schema) { // Branches with a then expressions that is not nullable can be skipped Ok(false) => None, // Pass error determining nullability on verbatim err @ Err(_) => Some(err), // For branches with a nullable then expressions try to determine - // statically if the predicate prevents null from being returned. - Ok(true) => match const_result_when_value_is_null(w, t) { - // Static analysis did not provide an answer; assume nullable - None => Some(Ok(true)), - - Some(true) => Some(Ok(true)), - // We can prove that the predicate will always be false if the - // then branch were to evaluate to null. The most common pattern for - // this is `WHEN x IS NOT NULL THEN x`. If x + // using limited const evaluation if the branch will be taken when + // the then expression evaluates to null. + Ok(true) => match const_result_when_value_is_null(w, t, input_schema) { + // Const evaluation was inconclusive or determined the branch would + // be taken + None | Some(true) => Some(Ok(true)), + // Const evaluation proves the branch will never be taken. + // The most common pattern for this is + // `WHEN x IS NOT NULL THEN x`. Some(false) => None, }, } }) .collect::>>()?; - if then_nullable.contains(&true) { + if !then_nullable.is_empty() { Ok(true) } else if let Some(e) = &case.else_expr { e.nullable(input_schema) @@ -671,27 +666,23 @@ impl ExprSchemable for Expr { /// Determines if the given `predicate` can be const evaluated if `value` were to evaluate to `NULL`. /// Returns a `Some` value containing the const result if so; otherwise returns `None`. -fn const_result_when_value_is_null(predicate: &Expr, value: &Expr) -> Option { +fn const_result_when_value_is_null(predicate: &Expr, value: &Expr, input_schema: &dyn ExprSchema) -> Option { match predicate { Expr::IsNotNull(e) => { - if e.as_ref().eq(value) { - Some(false) - } else { - None - } + // If `e` is null, then `e IS NOT NULL` is false + // If `e` is not null, then `e IS NOT NULL` is true + is_null(e, value, input_schema).map(|is_null| !is_null) } Expr::IsNull(e) => { - if e.as_ref().eq(value) { - Some(true) - } else { - None - } + // If `e` is null, then `e IS NULL` is true + // If `e` is not null, then `e IS NULL` is false + is_null(e, value, input_schema) } - Expr::Not(e) => const_result_when_value_is_null(e, value).map(|b| !b), + Expr::Not(e) => const_result_when_value_is_null(e, value, input_schema).map(|b| !b), Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { Operator::And => { - let l = const_result_when_value_is_null(left, value); - let r = const_result_when_value_is_null(right, value); + let l = const_result_when_value_is_null(left, value, input_schema); + let r = const_result_when_value_is_null(right, value, input_schema); match (l, r) { (Some(l), Some(r)) => Some(l && r), (Some(l), None) => Some(l), @@ -700,8 +691,8 @@ fn const_result_when_value_is_null(predicate: &Expr, value: &Expr) -> Option { - let l = const_result_when_value_is_null(left, value); - let r = const_result_when_value_is_null(right, value); + let l = const_result_when_value_is_null(left, value, input_schema); + let r = const_result_when_value_is_null(right, value, input_schema); match (l, r) { (Some(l), Some(r)) => Some(l || r), _ => None, @@ -713,6 +704,25 @@ fn const_result_when_value_is_null(predicate: &Expr, value: &Expr) -> Option Option { + // We're assuming `value` is null + if expr.eq(value) { + return Some(true); + } + + match expr { + // Literal null is obviously null + Expr::Literal(ScalarValue::Null, _) => Some(true), + // We're assuming `value` is null + _ => match expr.nullable(input_schema) { + // If `expr` is not nullable, we can be certain `expr` is not null + Ok(false) => Some(false), + // Otherwise inconclusive + _ => None, + } + } +} + impl Expr { /// Common method for window functions that applies type coercion /// to all arguments of the window function to check if it matches @@ -896,11 +906,11 @@ mod tests { assert!(expr.nullable(&get_schema(false)).unwrap()); } - fn assert_nullability(expr: &Expr, schema: &dyn ExprSchema, nullable: bool) { + fn assert_nullability(expr: &Expr, schema: &dyn ExprSchema, expected: bool) { assert_eq!( expr.nullable(schema).unwrap(), - nullable, - "Nullability of '{expr}' should be {nullable}" + expected, + "Nullability of '{expr}' should be {expected}" ); } From 7ad94e1171b4293afaae27bb61c84c4a7d1c94c3 Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Tue, 30 Sep 2025 20:27:15 +0200 Subject: [PATCH 5/6] #17838 Extend predicate const evaluation --- datafusion/expr/src/expr_schema.rs | 390 ++++++++++++++++++++++------- 1 file changed, 301 insertions(+), 89 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 6c9dda23c318..ec2344f962f2 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -28,7 +28,10 @@ use crate::udf::ReturnFieldArgs; use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; use arrow::compute::can_cast_types; use arrow::datatypes::{DataType, Field, FieldRef}; -use datafusion_common::{not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, Result, ScalarValue, Spans, TableReference}; +use datafusion_common::{ + not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, + Result, ScalarValue, Spans, TableReference, +}; use datafusion_expr_common::operator::Operator; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use datafusion_functions_window_common::field::WindowUDFFieldArgs; @@ -278,31 +281,43 @@ impl ExprSchemable for Expr { Expr::Literal(value, _) => Ok(value.is_null()), Expr::Case(case) => { // This expression is nullable if any of the then expressions are nullable - let then_nullable = case + let any_nullable_thens = !case .when_then_expr .iter() .filter_map(|(w, t)| { match t.nullable(input_schema) { - // Branches with a then expressions that is not nullable can be skipped + // Branches with a then expression that is not nullable can be skipped Ok(false) => None, // Pass error determining nullability on verbatim - err @ Err(_) => Some(err), + Err(e) => Some(Err(e)), // For branches with a nullable then expressions try to determine // using limited const evaluation if the branch will be taken when // the then expression evaluates to null. - Ok(true) => match const_result_when_value_is_null(w, t, input_schema) { - // Const evaluation was inconclusive or determined the branch would - // be taken - None | Some(true) => Some(Ok(true)), - // Const evaluation proves the branch will never be taken. - // The most common pattern for this is - // `WHEN x IS NOT NULL THEN x`. - Some(false) => None, - }, + Ok(true) => { + let const_result = WhenThenConstEvaluator { + then_expr: t, + input_schema, + } + .const_eval_predicate(w); + + match const_result { + // Const evaluation was inconclusive or determined the branch + // would be taken + None | Some(TriStateBool::True) => Some(Ok(())), + // Const evaluation proves the branch will never be taken. + // The most common pattern for this is + // `WHEN x IS NOT NULL THEN x`. + Some(TriStateBool::False) + | Some(TriStateBool::Uncertain) => None, + } + } } }) - .collect::>>()?; - if !then_nullable.is_empty() { + .collect::>>()? + .is_empty(); + + if any_nullable_thens { + // There is at least one reachable nullable then Ok(true) } else if let Some(e) = &case.else_expr { e.nullable(input_schema) @@ -664,61 +679,223 @@ impl ExprSchemable for Expr { } } -/// Determines if the given `predicate` can be const evaluated if `value` were to evaluate to `NULL`. -/// Returns a `Some` value containing the const result if so; otherwise returns `None`. -fn const_result_when_value_is_null(predicate: &Expr, value: &Expr, input_schema: &dyn ExprSchema) -> Option { - match predicate { - Expr::IsNotNull(e) => { - // If `e` is null, then `e IS NOT NULL` is false - // If `e` is not null, then `e IS NOT NULL` is true - is_null(e, value, input_schema).map(|is_null| !is_null) - } - Expr::IsNull(e) => { - // If `e` is null, then `e IS NULL` is true - // If `e` is not null, then `e IS NULL` is false - is_null(e, value, input_schema) - } - Expr::Not(e) => const_result_when_value_is_null(e, value, input_schema).map(|b| !b), - Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { - Operator::And => { - let l = const_result_when_value_is_null(left, value, input_schema); - let r = const_result_when_value_is_null(right, value, input_schema); - match (l, r) { - (Some(l), Some(r)) => Some(l && r), - (Some(l), None) => Some(l), - (None, Some(r)) => Some(r), +enum TriStateBool { + True, + False, + Uncertain, +} + +struct WhenThenConstEvaluator<'a> { + then_expr: &'a Expr, + input_schema: &'a dyn ExprSchema, +} + +impl WhenThenConstEvaluator<'_> { + /// Attempts to const evaluate the given predicate. + /// Returns a `Some` value containing the const result if so; otherwise returns `None`. + fn const_eval_predicate(&self, predicate: &Expr) -> Option { + match predicate { + // Literal null is equivalent to boolean uncertain + Expr::Literal(ScalarValue::Null, _) => Some(TriStateBool::Uncertain), + Expr::IsNotNull(e) => { + if let Ok(false) = e.nullable(self.input_schema) { + // If `e` is not nullable, then `e IS NOT NULL` is always true + return Some(TriStateBool::True); + } + + match e.get_type(self.input_schema) { + Ok(DataType::Boolean) => match self.const_eval_predicate(e) { + Some(TriStateBool::True) | Some(TriStateBool::False) => { + Some(TriStateBool::True) + } + Some(TriStateBool::Uncertain) => Some(TriStateBool::False), + None => None, + }, + Ok(_) => match self.is_null(e) { + Some(true) => Some(TriStateBool::False), + Some(false) => Some(TriStateBool::True), + None => None, + }, + Err(_) => None, + } + } + Expr::IsNull(e) => { + if let Ok(false) = e.nullable(self.input_schema) { + // If `e` is not nullable, then `e IS NULL` is always false + return Some(TriStateBool::False); + } + + match e.get_type(self.input_schema) { + Ok(DataType::Boolean) => match self.const_eval_predicate(e) { + Some(TriStateBool::True) | Some(TriStateBool::False) => { + Some(TriStateBool::False) + } + Some(TriStateBool::Uncertain) => Some(TriStateBool::True), + None => None, + }, + Ok(_) => match self.is_null(e) { + Some(true) => Some(TriStateBool::True), + Some(false) => Some(TriStateBool::False), + None => None, + }, + Err(_) => None, + } + } + Expr::IsTrue(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::True) => Some(TriStateBool::True), + Some(_) => Some(TriStateBool::False), + _ => None, + }, + Expr::IsNotTrue(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::True) => Some(TriStateBool::False), + Some(_) => Some(TriStateBool::True), + _ => None, + }, + Expr::IsFalse(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::False) => Some(TriStateBool::True), + Some(_) => Some(TriStateBool::False), + _ => None, + }, + Expr::IsNotFalse(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::False) => Some(TriStateBool::False), + Some(_) => Some(TriStateBool::True), + _ => None, + }, + Expr::IsUnknown(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::Uncertain) => Some(TriStateBool::True), + Some(_) => Some(TriStateBool::False), + _ => None, + }, + Expr::IsNotUnknown(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::Uncertain) => Some(TriStateBool::False), + Some(_) => Some(TriStateBool::True), + _ => None, + }, + Expr::Like(Like { expr, pattern, .. }) => { + match (self.is_null(expr), self.is_null(pattern)) { + (Some(true), _) | (_, Some(true)) => Some(TriStateBool::Uncertain), _ => None, } } - Operator::Or => { - let l = const_result_when_value_is_null(left, value, input_schema); - let r = const_result_when_value_is_null(right, value, input_schema); - match (l, r) { - (Some(l), Some(r)) => Some(l || r), + Expr::SimilarTo(Like { expr, pattern, .. }) => { + match (self.is_null(expr), self.is_null(pattern)) { + (Some(true), _) | (_, Some(true)) => Some(TriStateBool::Uncertain), _ => None, } } - _ => None, - }, - _ => None, - } -} - -fn is_null(expr: &Expr, value: &Expr, input_schema: &dyn ExprSchema) -> Option { - // We're assuming `value` is null - if expr.eq(value) { - return Some(true); + Expr::Between(Between { + expr, low, high, .. + }) => match (self.is_null(expr), self.is_null(low), self.is_null(high)) { + (Some(true), _, _) | (_, Some(true), _) | (_, _, Some(true)) => { + Some(TriStateBool::Uncertain) + } + _ => None, + }, + Expr::Not(e) => match self.const_eval_predicate(e) { + Some(TriStateBool::True) => Some(TriStateBool::False), + Some(TriStateBool::False) => Some(TriStateBool::True), + Some(TriStateBool::Uncertain) => Some(TriStateBool::Uncertain), + None => None, + }, + Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { + Operator::And => { + match ( + self.const_eval_predicate(left), + self.const_eval_predicate(right), + ) { + (Some(TriStateBool::False), _) + | (_, Some(TriStateBool::False)) => Some(TriStateBool::False), + (Some(TriStateBool::True), Some(TriStateBool::True)) => { + Some(TriStateBool::True) + } + (Some(TriStateBool::Uncertain), Some(_)) + | (Some(_), Some(TriStateBool::Uncertain)) => { + Some(TriStateBool::Uncertain) + } + _ => None, + } + } + Operator::Or => { + match ( + self.const_eval_predicate(left), + self.const_eval_predicate(right), + ) { + (Some(TriStateBool::True), _) | (_, Some(TriStateBool::True)) => { + Some(TriStateBool::True) + } + (Some(TriStateBool::False), Some(TriStateBool::False)) => { + Some(TriStateBool::False) + } + (Some(TriStateBool::Uncertain), Some(_)) + | (Some(_), Some(TriStateBool::Uncertain)) => { + Some(TriStateBool::Uncertain) + } + _ => None, + } + } + _ => match (self.is_null(left), self.is_null(right)) { + (Some(true), _) | (_, Some(true)) => Some(TriStateBool::Uncertain), + _ => None, + }, + }, + e => match self.is_null(e) { + Some(true) => Some(TriStateBool::Uncertain), + _ => None, + }, + } } - match expr { - // Literal null is obviously null - Expr::Literal(ScalarValue::Null, _) => Some(true), - // We're assuming `value` is null - _ => match expr.nullable(input_schema) { - // If `expr` is not nullable, we can be certain `expr` is not null - Ok(false) => Some(false), - // Otherwise inconclusive - _ => None, + /// Determines if the given expression is null. + /// + /// This function returns: + /// - `Some(true)` is `expr` is certainly null + /// - `Some(false)` is `expr` can certainly not be null + /// - `None` if the result is inconclusive + fn is_null(&self, expr: &Expr) -> Option { + match expr { + // Literal null is obviously null + Expr::Literal(ScalarValue::Null, _) => Some(true), + Expr::Negative(e) => self.is_null(e), + Expr::Like(Like { expr, pattern, .. }) => { + match (self.is_null(expr), self.is_null(pattern)) { + (Some(true), _) | (_, Some(true)) => Some(true), + _ => None, + } + } + Expr::SimilarTo(Like { expr, pattern, .. }) => { + match (self.is_null(expr), self.is_null(pattern)) { + (Some(true), _) | (_, Some(true)) => Some(true), + _ => None, + } + } + Expr::Not(e) => self.is_null(e), + Expr::BinaryExpr(BinaryExpr { left, right, .. }) => { + match (self.is_null(left), self.is_null(right)) { + (Some(true), _) | (_, Some(true)) => Some(true), + _ => None, + } + } + Expr::Between(Between { + expr, low, high, .. + }) => match (self.is_null(expr), self.is_null(low), self.is_null(high)) { + (Some(true), _, _) | (_, Some(true), _) | (_, _, Some(true)) => { + Some(true) + } + _ => None, + }, + e => { + if e.eq(self.then_expr) { + // Evaluation occurs under the assumption that `then_expr` evaluates to null + Some(true) + } else { + match expr.nullable(self.input_schema) { + // If `expr` is not nullable, we can be certain `expr` is not null + Ok(false) => Some(false), + // Otherwise inconclusive + _ => None, + } + } + } } } } @@ -933,46 +1110,46 @@ mod tests { .with_nullable(false); // CASE WHEN x IS NOT NULL THEN x ELSE 0 - let e1 = when(col("x").is_not_null(), col("x")).otherwise(lit(0))?; - assert_not_nullable(&e1, &nullable_schema); - assert_not_nullable(&e1, ¬_nullable_schema); + let e = when(col("x").is_not_null(), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN NOT x IS NULL THEN x ELSE 0 - let e2 = when(not(col("x").is_null()), col("x")).otherwise(lit(0))?; - assert_not_nullable(&e2, &nullable_schema); - assert_not_nullable(&e2, ¬_nullable_schema); + let e = when(not(col("x").is_null()), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN X = 5 THEN x ELSE 0 - let e3 = when(col("x").eq(lit(5)), col("x")).otherwise(lit(0))?; - assert_nullable(&e3, &nullable_schema); - assert_not_nullable(&e3, ¬_nullable_schema); + let e = when(col("x").eq(lit(5)), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN x IS NOT NULL AND x = 5 THEN x ELSE 0 - let e4 = when(and(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) + let e = when(and(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) .otherwise(lit(0))?; - assert_not_nullable(&e4, &nullable_schema); - assert_not_nullable(&e4, ¬_nullable_schema); + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN x = 5 AND x IS NOT NULL THEN x ELSE 0 - let e5 = when(and(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) + let e = when(and(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) .otherwise(lit(0))?; - assert_not_nullable(&e5, &nullable_schema); - assert_not_nullable(&e5, ¬_nullable_schema); + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN x IS NOT NULL OR x = 5 THEN x ELSE 0 - let e6 = when(or(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) + let e = when(or(col("x").is_not_null(), col("x").eq(lit(5))), col("x")) .otherwise(lit(0))?; - assert_nullable(&e6, &nullable_schema); - assert_not_nullable(&e6, ¬_nullable_schema); + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN x = 5 OR x IS NOT NULL THEN x ELSE 0 - let e7 = when(or(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) + let e = when(or(col("x").eq(lit(5)), col("x").is_not_null()), col("x")) .otherwise(lit(0))?; - assert_nullable(&e7, &nullable_schema); - assert_not_nullable(&e7, ¬_nullable_schema); + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN (x = 5 AND x IS NOT NULL) OR (x = bar AND x IS NOT NULL) THEN x ELSE 0 - let e8 = when( + let e = when( or( and(col("x").eq(lit(5)), col("x").is_not_null()), and(col("x").eq(col("bar")), col("x").is_not_null()), @@ -980,14 +1157,49 @@ mod tests { col("x"), ) .otherwise(lit(0))?; - assert_not_nullable(&e8, &nullable_schema); - assert_not_nullable(&e8, ¬_nullable_schema); + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); // CASE WHEN x = 5 OR x IS NULL THEN x ELSE 0 - let e9 = when(or(col("x").eq(lit(5)), col("x").is_null()), col("x")) + let e = when(or(col("x").eq(lit(5)), col("x").is_null()), col("x")) .otherwise(lit(0))?; - assert_nullable(&e9, &nullable_schema); - assert_not_nullable(&e9, ¬_nullable_schema); + assert_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS TRUE THEN x ELSE 0 + let e = when(col("x").is_true(), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS NOT TRUE THEN x ELSE 0 + let e = when(col("x").is_not_true(), col("x")).otherwise(lit(0))?; + assert_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS FALSE THEN x ELSE 0 + let e = when(col("x").is_false(), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS NOT FALSE THEN x ELSE 0 + let e = when(col("x").is_not_false(), col("x")).otherwise(lit(0))?; + assert_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS UNKNOWN THEN x ELSE 0 + let e = when(col("x").is_unknown(), col("x")).otherwise(lit(0))?; + assert_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x IS NOT UNKNOWN THEN x ELSE 0 + let e = when(col("x").is_not_unknown(), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN x LIKE 'x' THEN x ELSE 0 + let e = when(col("x").like(lit("x")), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); Ok(()) } From 7ff0810b1c2a4f40fb883232c60fefe84b299a96 Mon Sep 17 00:00:00 2001 From: Pepijn Van Eeckhoudt Date: Wed, 1 Oct 2025 11:45:36 +0200 Subject: [PATCH 6/6] #17838 Correctly report nullability of implicit casts in predicates --- datafusion/expr/src/expr_schema.rs | 33 ++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index ec2344f962f2..9eeb2c723be3 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -679,12 +679,31 @@ impl ExprSchemable for Expr { } } +/// Represents the possible values for SQL's three valued logic. +/// `Option` is not used for this since `None` is used to represent +/// inconclusive answers already. enum TriStateBool { True, False, Uncertain, } +impl TryFrom<&ScalarValue> for TriStateBool { + type Error = DataFusionError; + + fn try_from(value: &ScalarValue) -> std::result::Result { + match value { + ScalarValue::Null => Ok(TriStateBool::Uncertain), + ScalarValue::Boolean(b) => Ok(match b { + None => TriStateBool::Uncertain, + Some(true) => TriStateBool::True, + Some(false) => TriStateBool::False, + }), + _ => Self::try_from(&value.cast_to(&DataType::Boolean)?) + } + } +} + struct WhenThenConstEvaluator<'a> { then_expr: &'a Expr, input_schema: &'a dyn ExprSchema, @@ -696,7 +715,7 @@ impl WhenThenConstEvaluator<'_> { fn const_eval_predicate(&self, predicate: &Expr) -> Option { match predicate { // Literal null is equivalent to boolean uncertain - Expr::Literal(ScalarValue::Null, _) => Some(TriStateBool::Uncertain), + Expr::Literal(scalar, _) => TriStateBool::try_from(scalar).ok(), Expr::IsNotNull(e) => { if let Ok(false) = e.nullable(self.input_schema) { // If `e` is not nullable, then `e IS NOT NULL` is always true @@ -845,7 +864,7 @@ impl WhenThenConstEvaluator<'_> { } } - /// Determines if the given expression is null. + /// Determines if the given expression evaluates to null. /// /// This function returns: /// - `Some(true)` is `expr` is certainly null @@ -1201,6 +1220,16 @@ mod tests { assert_not_nullable(&e, &nullable_schema); assert_not_nullable(&e, ¬_nullable_schema); + // CASE WHEN 0 THEN x ELSE 0 + let e = when(lit(0), col("x")).otherwise(lit(0))?; + assert_not_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + + // CASE WHEN 1 THEN x ELSE 0 + let e = when(lit(1), col("x")).otherwise(lit(0))?; + assert_nullable(&e, &nullable_schema); + assert_not_nullable(&e, ¬_nullable_schema); + Ok(()) }