Skip to content

Commit

Permalink
Merge pull request #49 from cmu-db/logical_optimizer_rules
Browse files Browse the repository at this point in the history
feat: [Logical Optimizer]Eliminate Join Rule
  • Loading branch information
AveryQi115 authored Feb 11, 2024
2 parents d36dfd5 + 08791ad commit e3c8f2f
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 45 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions optd-datafusion-bridge/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
arrow-schema = "*"
datafusion = "32.0.0"
datafusion-expr = "32.0.0"
async-trait = "0.1"
optd-core = { path = "../optd-core" }
optd-datafusion-repr = { path = "../optd-datafusion-repr" }
Expand Down
36 changes: 29 additions & 7 deletions optd-datafusion-bridge/src/into_optd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use datafusion::{
logical_expr::{self, logical_plan, LogicalPlan, Operator},
scalar::ScalarValue,
};
use datafusion_expr::Expr as DFExpr;
use optd_core::rel_node::RelNode;
use optd_datafusion_repr::plan_nodes::{
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, ExprList, FuncExpr, FuncType,
Expand Down Expand Up @@ -117,7 +118,7 @@ impl OptdPlanContext<'_> {
expr,
)
.into_expr())
}
}
_ => bail!("Unsupported expression: {:?}", expr),
}
}
Expand Down Expand Up @@ -215,12 +216,33 @@ impl OptdPlanContext<'_> {
}

if log_ops.is_empty() {
Ok(LogicalJoin::new(
left,
right,
ConstantExpr::bool(true).into_expr(),
join_type,
))
// optd currently only supports
// 1. normal equal condition join
// select * from a join b on a.id = b.id
// 2. join on false/true
// select * from a join b on false/true
// 3. join on other literals or other filters are not supported
// instead of converting them to a join on true, we bail out

match node.filter {
Some(DFExpr::Literal(ScalarValue::Boolean(Some(val)))) => {
return Ok(LogicalJoin::new(
left,
right,
ConstantExpr::bool(val).into_expr(),
join_type,
));
}
None => {
return Ok(LogicalJoin::new(
left,
right,
ConstantExpr::bool(true).into_expr(),
join_type,
));
}
_ => bail!("unsupported join filter: {:?}", node.filter),
}
} else if log_ops.len() == 1 {
Ok(LogicalJoin::new(left, right, log_ops.remove(0), join_type))
} else {
Expand Down
5 changes: 4 additions & 1 deletion optd-datafusion-repr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use optd_core::cascades::{CascadesOptimizer, GroupId, OptimizerProperties};
use plan_nodes::{OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode};
use properties::schema::{Catalog, SchemaPropertyBuilder};
use rules::{
HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule, ProjectionPullUpJoin,
EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, PhysicalConversionRule,
ProjectionPullUpJoin,
};

pub use adaptive::PhysicalCollector;
Expand Down Expand Up @@ -46,6 +47,8 @@ impl DatafusionOptimizer {
rules.push(Arc::new(JoinCommuteRule::new()));
rules.push(Arc::new(JoinAssocRule::new()));
rules.push(Arc::new(ProjectionPullUpJoin::new()));
rules.push(Arc::new(EliminateJoinRule::new()));

let cost_model = AdaptiveCostModel::new(50);
Self {
runtime_statistics: cost_model.get_runtime_map(),
Expand Down
4 changes: 3 additions & 1 deletion optd-datafusion-repr/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,7 @@ mod macros;
mod physical;

// pub use filter_join::FilterJoinPullUpRule;
pub use joins::{HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin};
pub use joins::{
EliminateJoinRule, HashJoinRule, JoinAssocRule, JoinCommuteRule, ProjectionPullUpJoin,
};
pub use physical::PhysicalConversionRule;
44 changes: 42 additions & 2 deletions optd-datafusion-repr/src/rules/joins.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::vec;

use itertools::Itertools;
use optd_core::optimizer::Optimizer;
Expand All @@ -8,8 +9,9 @@ use optd_core::rules::{Rule, RuleMatcher};

use super::macros::{define_impl_rule, define_rule};
use crate::plan_nodes::{
BinOpExpr, BinOpType, ColumnRefExpr, Expr, ExprList, JoinType, LogicalJoin, LogicalProjection,
OptRelNode, OptRelNodeTyp, PhysicalHashJoin, PlanNode,
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, Expr, ExprList, JoinType,
LogicalEmptyRelation, LogicalJoin, LogicalProjection, OptRelNode, OptRelNodeTyp,
PhysicalHashJoin, PlanNode,
};
use crate::properties::schema::SchemaPropertyBuilder;

Expand Down Expand Up @@ -78,6 +80,44 @@ fn apply_join_commute(
vec![node.as_ref().clone()]
}

define_rule!(
EliminateJoinRule,
apply_eliminate_join,
(Join(JoinType::Inner), left, right, [cond])
);

/// Eliminate logical join with constant predicates
/// True predicates becomes CrossJoin (not yet implemented)
/// False predicates become EmptyRelation (not yet implemented)
#[allow(unused_variables)]
fn apply_eliminate_join(
optimizer: &impl Optimizer<OptRelNodeTyp>,
EliminateJoinRulePicks { left, right, cond }: EliminateJoinRulePicks,
) -> Vec<RelNode<OptRelNodeTyp>> {
if let OptRelNodeTyp::Constant(const_type) = cond.typ {
if const_type == ConstantType::Bool {
if let Some(data) = cond.data {
if data.as_bool() {
// change it to cross join if filter is always true
let node = LogicalJoin::new(
PlanNode::from_group(left.into()),
PlanNode::from_group(right.into()),
ConstantExpr::bool(true).into_expr(),
JoinType::Cross,
);
return vec![node.into_rel_node().as_ref().clone()];
} else {
// No need to handle schema here, as all exprs in the same group
// will have same logical properties
let node = LogicalEmptyRelation::new(false);
return vec![node.into_rel_node().as_ref().clone()];
}
}
}
}
vec![]
}

// (A join B) join C -> A join (B join C)
define_rule!(
JoinAssocRule,
Expand Down
35 changes: 2 additions & 33 deletions optd-sqlplannertest/tests/empty_relation.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,44 +33,13 @@ PhysicalProjection
-- Test whether the optimizer eliminates join to empty relation
select * from t1 inner join t2 on false;
select 64+1 from t1 inner join t2 on false;
select 64+1 from t1 inner join t2 on 1=0;

/*
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
└── LogicalJoin { join_type: Inner, cond: true }
└── LogicalJoin { join_type: Inner, cond: false }
├── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #1, #2, #3 ] }
└── PhysicalProjection { exprs: [ #2, #3, #0, #1 ] }
└── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
├── PhysicalScan { table: t2 }
└── PhysicalScan { table: t1 }
0 0 0 200
0 0 1 201
0 0 2 202
1 1 0 200
1 1 1 201
1 1 2 202
2 2 0 200
2 2 1 201
2 2 2 202
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
└── PhysicalEmptyRelation { produce_one_row: false }
*/

1 change: 0 additions & 1 deletion optd-sqlplannertest/tests/empty_relation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
- sql: |
select * from t1 inner join t2 on false;
select 64+1 from t1 inner join t2 on false;
select 64+1 from t1 inner join t2 on 1=0;
desc: Test whether the optimizer eliminates join to empty relation
tasks:
- explain:logical_optd,physical_optd
Expand Down

0 comments on commit e3c8f2f

Please sign in to comment.