Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(query): null first behavior #16475

Merged
merged 5 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions src/query/ast/src/parser/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,6 @@ impl Dialect {
}
}

pub fn is_null_biggest(&self) -> bool {
match self {
Dialect::MySQL => false,
Dialect::Hive => false,
Dialect::Experimental | Dialect::PostgreSQL | Dialect::PRQL => true,
}
}

pub fn substr_index_zero_literal_as_one(&self) -> bool {
match self {
Dialect::MySQL => false,
Expand Down
9 changes: 9 additions & 0 deletions src/query/settings/src/settings_default.rs
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,15 @@ impl DefaultSettings {
mode: SettingMode::Both,
range: Some(SettingRange::Numeric(0..=u64::MAX)),
}),
("default_order_by_null", DefaultSettingValue {
value: UserSettingValue::String("nulls_last".to_string()),
desc: "Set numeric default_order_by_null mode",
mode: SettingMode::Both,
range: Some(SettingRange::String(vec![
"nulls_first".into(), "nulls_last".into(),
"nulls_first_on_asc_last_on_desc".into(), "nulls_last_on_asc_first_on_desc".into(),
])),
}),
("ddl_column_type_nullable", DefaultSettingValue {
value: UserSettingValue::UInt64(1),
desc: "Sets new columns to be nullable (1) or not (0) by default in table operations.",
Expand Down
15 changes: 15 additions & 0 deletions src/query/settings/src/settings_getter_setter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,21 @@ impl Settings {
self.try_get_string("numeric_cast_option")
}

pub fn get_nulls_first(&self) -> impl Fn(bool) -> bool {
match self
.try_get_string("default_order_by_null")
.unwrap_or("nulls_last".to_string())
.to_ascii_lowercase()
.as_str()
{
"nulls_last" => |_| false,
"nulls_first" => |_| true,
"nulls_first_on_asc_last_on_desc" => |asc: bool| asc,
"nulls_last_on_asc_first_on_desc" => |asc: bool| !asc,
_ => |_| false,
}
}

pub fn get_external_server_connect_timeout_secs(&self) -> Result<u64> {
self.try_get_u64("external_server_connect_timeout_secs")
}
Expand Down
15 changes: 10 additions & 5 deletions src/query/sql/src/executor/physical_plans/physical_window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,19 @@ impl PhysicalPlanBuilder {
}
}

let default_nulls_first = self.ctx.get_settings().get_nulls_first();

let order_by_items = w
.order_by
.iter()
.map(|v| SortDesc {
asc: v.asc.unwrap_or(true),
nulls_first: v.nulls_first.unwrap_or(false),
order_by: v.order_by_item.index,
display_name: self.metadata.read().column(v.order_by_item.index).name(),
.map(|v| {
let asc = v.asc.unwrap_or(true);
SortDesc {
asc,
nulls_first: v.nulls_first.unwrap_or_else(|| default_nulls_first(asc)),
order_by: v.order_by_item.index,
display_name: self.metadata.read().column(v.order_by_item.index).name(),
}
})
.collect::<Vec<_>>();
let partition_items = w.partition_by.iter().map(|v| v.index).collect::<Vec<_>>();
Expand Down
10 changes: 8 additions & 2 deletions src/query/sql/src/planner/binder/bind_query/bind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,22 @@ impl Binder {
self.ctes_map.clone(),
);
let mut order_by_items = Vec::with_capacity(query.order_by.len());

let default_nulls_first = self.ctx.get_settings().get_nulls_first();

for order in query.order_by.iter() {
match order.expr {
Expr::ColumnRef { .. } => {
let scalar = scalar_binder.bind(&order.expr)?.0;
match scalar {
ScalarExpr::BoundColumnRef(BoundColumnRef { column, .. }) => {
let asc = order.asc.unwrap_or(true);
let order_by_item = SortItem {
index: column.index,
asc: order.asc.unwrap_or(true),
nulls_first: order.nulls_first.unwrap_or(false),
asc,
nulls_first: order
.nulls_first
.unwrap_or_else(|| default_nulls_first(asc)),
};
order_by_items.push(order_by_item);
}
Expand Down
31 changes: 17 additions & 14 deletions src/query/sql/src/planner/binder/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,7 @@ impl Binder {
distinct: bool,
) -> Result<OrderItems> {
bind_context.set_expr_context(ExprContext::OrderByClause);
// null is the largest value in databend, smallest in hive
// TODO: rewrite after https://github.com/jorgecarleitao/arrow2/pull/1286 is merged
let default_nulls_first = !self
.ctx
.get_settings()
.get_sql_dialect()
.unwrap()
.is_null_biggest();
let default_nulls_first = self.ctx.get_settings().get_nulls_first();

let mut order_items = Vec::with_capacity(order_by.len());
for order in order_by {
Expand All @@ -94,11 +87,15 @@ impl Binder {

let index = index - 1;
let projection = &projections[index];

let asc = order.asc.unwrap_or(true);
order_items.push(OrderItem {
index: projection.index,
name: projection.column_name.clone(),
asc: order.asc.unwrap_or(true),
nulls_first: order.nulls_first.unwrap_or(default_nulls_first),
asc,
nulls_first: order
.nulls_first
.unwrap_or_else(|| default_nulls_first(asc)),
});
}
_ => {
Expand All @@ -119,11 +116,14 @@ impl Binder {
.find(|(_, (_, scalar))| bound_expr.eq(scalar))
{
// The order by expression is in the select list.
let asc = order.asc.unwrap_or(true);
order_items.push(OrderItem {
index: projections[idx].index,
name: alias.clone(),
asc: order.asc.unwrap_or(true),
nulls_first: order.nulls_first.unwrap_or(default_nulls_first),
asc,
nulls_first: order
.nulls_first
.unwrap_or_else(|| default_nulls_first(asc)),
});
} else if distinct {
return Err(ErrorCode::SemanticError(
Expand Down Expand Up @@ -165,11 +165,14 @@ impl Binder {
index: column_binding.index,
};
scalar_items.insert(column_binding.index, item);
let asc = order.asc.unwrap_or(true);
order_items.push(OrderItem {
index: column_binding.index,
name: column_binding.column_name,
asc: order.asc.unwrap_or(true),
nulls_first: order.nulls_first.unwrap_or(default_nulls_first),
asc,
nulls_first: order
.nulls_first
.unwrap_or_else(|| default_nulls_first(asc)),
});
}
}
Expand Down
16 changes: 7 additions & 9 deletions src/query/sql/src/planner/binder/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,29 +89,27 @@ impl Binder {
child
};

let default_nulls_first = !self
.ctx
.get_settings()
.get_sql_dialect()
.unwrap()
.is_null_biggest();
let default_nulls_first = self.ctx.get_settings().get_nulls_first();

let mut sort_items: Vec<SortItem> = vec![];
if !window_plan.partition_by.is_empty() {
for part in window_plan.partition_by.iter() {
sort_items.push(SortItem {
index: part.index,
asc: true,
nulls_first: default_nulls_first,
nulls_first: default_nulls_first(true),
});
}
}

for order in window_plan.order_by.iter() {
let asc = order.asc.unwrap_or(true);
sort_items.push(SortItem {
index: order.order_by_item.index,
asc: order.asc.unwrap_or(true),
nulls_first: order.nulls_first.unwrap_or(default_nulls_first),
asc,
nulls_first: order
.nulls_first
.unwrap_or_else(|| default_nulls_first(asc)),
});
}

Expand Down
12 changes: 9 additions & 3 deletions src/query/sql/src/planner/semantic/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3261,7 +3261,7 @@ impl<'a> TypeChecker<'a> {
return None;
}
let mut asc = true;
let mut nulls_first = true;
let mut nulls_first = None;
if args.len() >= 2 {
let box (arg, _) = self.resolve(args[1]).ok()?;
if let Ok(arg) = ConstantExpr::try_from(arg) {
Expand Down Expand Up @@ -3291,9 +3291,9 @@ impl<'a> TypeChecker<'a> {
if let Ok(arg) = ConstantExpr::try_from(arg) {
if let Scalar::String(nulls_order) = arg.value {
if nulls_order.eq_ignore_ascii_case("nulls first") {
nulls_first = true;
nulls_first = Some(true);
} else if nulls_order.eq_ignore_ascii_case("nulls last") {
nulls_first = false;
nulls_first = Some(false);
} else {
return Some(Err(ErrorCode::SemanticError(
"Null sorting order must be either NULLS FIRST or NULLS LAST",
Expand All @@ -3310,6 +3310,12 @@ impl<'a> TypeChecker<'a> {
)));
}
}

let nulls_first = nulls_first.unwrap_or_else(|| {
let default_nulls_first = self.ctx.get_settings().get_nulls_first();
default_nulls_first(asc)
});

let func_name = match (asc, nulls_first) {
(true, true) => "array_sort_asc_null_first",
(false, true) => "array_sort_desc_null_first",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option<
Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)),
Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)),
),
_ => (Scalar::Null, Scalar::Null),
_ => return None,
}
}
Statistics::Int64(s) => {
Expand Down Expand Up @@ -92,7 +92,7 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option<
Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(max), *size)),
Scalar::Decimal(DecimalScalar::Decimal256(I256::from_i128(min), *size)),
),
_ => (Scalar::Null, Scalar::Null),
_ => return None,
}
}
Statistics::Int96(s) => {
Expand Down Expand Up @@ -124,12 +124,12 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option<
decode_decimal256_from_bytes(max, *size),
decode_decimal256_from_bytes(min, *size),
),
_ => (Scalar::Null, Scalar::Null),
_ => return None,
}
}
}
} else {
(Scalar::Null, Scalar::Null)
return None;
};
Some(ColumnStatistics::new(
min,
Expand Down
Binary file added tests/data/parquet/no-stats.parquet
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/sqllogictests/suites/query/order.test
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ NULL
1
2

query I
select * from order_test order by a nulls last
----
1
2
NULL

query II
select number d , max(1-number) c from numbers(4) group by 1 order by 2;
----
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,19 @@ select id from view_parquet where tinyint_col = 0 order by timestamp_col limit 5
2
4
6

query
select line_item_usage_start_date from @data/parquet/no-stats.parquet (pattern => '') order by line_item_usage_start_date limit 2
----
2024-09-11 06:00:00.000000
2024-09-11 06:00:00.000000

query I
select count() from @data/parquet/no-stats.parquet (pattern => '') where line_item_usage_start_date >= '2024-09-11 06:00:00'
----
25825

query I
select count() from @data/parquet/no-stats.parquet (pattern => '') where line_item_usage_start_date < '2024-09-11 06:00:00'
----
0
Loading