From f347fba2d25d62c98fe3bb5f28bc9b508f7ca017 Mon Sep 17 00:00:00 2001 From: nobigo Date: Tue, 29 Oct 2024 18:52:20 +0800 Subject: [PATCH] [CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata RowCount --- .../calcite/rel/rules/SubQueryRemoveRule.java | 18 +- .../apache/calcite/test/RelOptRulesTest.java | 46 +++++ .../apache/calcite/test/RelOptRulesTest.xml | 144 ++++++++++++++++ core/src/test/resources/sql/sub-query.iq | 163 +++++++++++++++++- 4 files changed, 368 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java index 6feee83c76a..3e9473f0b60 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java @@ -170,6 +170,13 @@ private static RexNode rewriteCollection(RexSubQuery e, */ private static RexNode rewriteSome(RexSubQuery e, Set variablesSet, RelBuilder builder, int subQueryIndex) { + // If the sub-query is guaranteed to return 0 row, just return + // FALSE. + final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery(); + final Double maxRowCount = mq.getMaxRowCount(e.rel); + if (maxRowCount != null && maxRowCount <= 0D) { + return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(), true); + } // Most general case, where the left and right keys might have nulls, and // caller requires 3-valued logic return. // @@ -459,11 +466,11 @@ private static RexNode rewriteExists(RexSubQuery e, Set variables // TRUE. final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery(); final Double minRowCount = mq.getMinRowCount(e.rel); - if (minRowCount != null && minRowCount >= 1D) { + if (minRowCount != null && minRowCount > 0D) { return builder.literal(true); } final Double maxRowCount = mq.getMaxRowCount(e.rel); - if (maxRowCount != null && maxRowCount < 1D) { + if (maxRowCount != null && maxRowCount <= 0D) { return builder.literal(false); } builder.push(e.rel); @@ -555,6 +562,13 @@ private static RexNode rewriteUnique(RexSubQuery e, RelBuilder builder) { */ private static RexNode rewriteIn(RexSubQuery e, Set variablesSet, RelOptUtil.Logic logic, RelBuilder builder, int offset, int subQueryIndex) { + // If the sub-query is guaranteed to return 0 row, just return + // FALSE. + final RelMetadataQuery mq = e.rel.getCluster().getMetadataQuery(); + final Double maxRowCount = mq.getMaxRowCount(e.rel); + if (maxRowCount != null && maxRowCount <= 0D) { + return builder.getRexBuilder().makeLiteral(Boolean.FALSE, e.getType(), true); + } // Most general case, where the left and right keys might have nulls, and // caller requires 3-valued logic return. // diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index 0454021eeea..5fe12d9617e 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -5049,6 +5049,52 @@ private void checkEmptyJoin(RelOptFixture f) { sql(sql).withSubQueryRules().check(); } + /** Test case for + * [CALCITE-6650] + * Optimize the IN sub-query and SOME sub-query + * by Metadata RowCount. */ + @Test void testInWithNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where deptno in (\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + + @Test void testNotInWithNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where deptno not in (\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + + @Test void testSomeWithGreaterThanNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where deptno > some(\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + + @Test void testSomeWithLessThanOrEqualNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where deptno <= some(\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + + @Test void testUniqueWithNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where unique(\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + + @Test void testNotUniqueWithNoRowSubQuery() { + final String sql = "select * from dept as d\n" + + "where not unique(\n" + + " select deptno from emp e where false)"; + sql(sql).withSubQueryRules().check(); + } + /** Test case for * [CALCITE-4848] * Adding a HAVING condition to a query with a dynamic parameter makes the result always empty diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index dacda972b16..eba397a3b9a 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -5081,6 +5081,30 @@ LogicalUnion(all=[true]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) LogicalProject(EXPR$0=[LOWER($1)]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + + + + + + + + + + + @@ -7724,6 +7748,30 @@ LogicalProject(USER=[USER]) LogicalAggregate(group=[{0}], EXPR$1=[SUM($1)]) LogicalProject(NAME=[$1], DEPTNO=[$0]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + + + + + + + @@ -7762,6 +7810,30 @@ LogicalProject(NAME=[$1], DEPTNO=[$0], EXPR$2=[$2]) LogicalFilter(condition=[=($0, 0)]) LogicalWindow(window#0=[window(partition {1} aggs [COUNT()])]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + + + + + + + @@ -14508,6 +14580,54 @@ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$ LogicalAggregate(group=[{0}]) LogicalProject(NAME=[$1]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + some( + select deptno from emp e where false)]]> + + + SOME($0, { +LogicalProject(DEPTNO=[$7]) + LogicalFilter(condition=[false]) + LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +})]) + LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + + + + + + + + + + @@ -16888,6 +17008,30 @@ LogicalUnion(all=[true]) + + + + + + + + + + + diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq index 382ad91ebd1..f1adf623ad9 100644 --- a/core/src/test/resources/sql/sub-query.iq +++ b/core/src/test/resources/sql/sub-query.iq @@ -163,7 +163,10 @@ select * from dept where deptno not in (select deptno from emp where false); (4 rows) !ok -select deptno, deptno in (select deptno from emp where false) from dept; +EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, { 30, 'Engineering' }, { 40, 'Empty ' }]]) +!plan + +select deptno, deptno in (select deptno from emp where false) from dept; +--------+--------+ | DEPTNO | EXPR$1 | +--------+--------+ @@ -175,6 +178,10 @@ select deptno, deptno in (select deptno from emp where false) from dept; (4 rows) !ok +EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], expr#3=[CAST($t2):BOOLEAN], DEPTNO=[$t0], EXPR$1=[$t3]) + EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, { 30, 'Engineering' }, { 40, 'Empty ' }]]) +!plan + select deptno, deptno not in (select deptno from emp where false) from dept; +--------+--------+ | DEPTNO | EXPR$1 | @@ -4238,4 +4245,158 @@ EnumerableCalc(expr#0..8=[{inputs}], proj#0..7=[{exprs}]) EnumerableTableScan(table=[[scott, EMP]]) !plan + +# [CALCITE-6650] Optimize the IN sub-query and SOME sub-query by Metadata RowCount + +!use post + +# Test case about SOME sub-query when sub-query return 0 row +select * from emp where deptno > some(select deptno from dept where false); ++-------+--------+--------+ +| ENAME | DEPTNO | GENDER | ++-------+--------+--------+ ++-------+--------+--------+ +(0 rows) + +!ok + +EnumerableValues(tuples=[[]]) +!plan + +# Same as previous; but is Scalar sub-query +select deptno, deptno > some(select deptno from dept where false) from emp; ++--------+--------+ +| DEPTNO | EXPR$1 | ++--------+--------+ +| 10 | false | +| 20 | false | +| 30 | false | +| 10 | false | +| 30 | false | +| 50 | false | +| 50 | false | +| 60 | false | +| | false | ++--------+--------+ +(9 rows) + +!ok + +EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN], DEPTNO=[$t0], EXPR$1=[$t2]) + EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 50 }, { 60 }, { null }]]) +!plan + +# Test case about ANY sub-query when sub-query return 0 row +select * from emp where deptno > any(select deptno from dept where false); ++-------+--------+--------+ +| ENAME | DEPTNO | GENDER | ++-------+--------+--------+ ++-------+--------+--------+ +(0 rows) + +!ok + +EnumerableValues(tuples=[[]]) +!plan + +# Same as previous; but is Scalar sub-query +select deptno, deptno > any(select deptno from dept where false) from emp; ++--------+--------+ +| DEPTNO | EXPR$1 | ++--------+--------+ +| 10 | false | +| 10 | false | +| 20 | false | +| 30 | false | +| 30 | false | +| 50 | false | +| 50 | false | +| 60 | false | +| | false | ++--------+--------+ +(9 rows) + +!ok + +EnumerableCalc(expr#0=[{inputs}], expr#1=[false], expr#2=[CAST($t1):BOOLEAN], DEPTNO=[$t0], EXPR$1=[$t2]) + EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 50 }, { 60 }, { null }]]) +!plan + +# Test case about UNIQUE sub-query when sub-query return 0 row +select * from emp where unique (select deptno from dept where false); ++-------+--------+--------+ +| ENAME | DEPTNO | GENDER | ++-------+--------+--------+ +| Adam | 50 | M | +| Alice | 30 | F | +| Bob | 10 | M | +| Eric | 20 | M | +| Eve | 50 | F | +| Grace | 60 | F | +| Jane | 10 | F | +| Susan | 30 | F | +| Wilma | | F | ++-------+--------+--------+ +(9 rows) + +!ok + +EnumerableValues(tuples=[[{ 'Jane ', 10, 'F' }, { 'Bob ', 10, 'M' }, { 'Eric ', 20, 'M' }, { 'Susan', 30, 'F' }, { 'Alice', 30, 'F' }, { 'Adam ', 50, 'M' }, { 'Eve ', 50, 'F' }, { 'Grace', 60, 'F' }, { 'Wilma', null, 'F' }]]) +!plan + +# Same as previous; but is Scalar sub-query +select unique (select deptno from dept where false) from emp; ++--------+ +| EXPR$0 | ++--------+ +| true | +| true | +| true | +| true | +| true | +| true | +| true | +| true | +| true | ++--------+ +(9 rows) + +!ok + +EnumerableCalc(expr#0=[{inputs}], expr#1=[true], EXPR$0=[$t1]) + EnumerableValues(tuples=[[{ 10 }, { 10 }, { 20 }, { 30 }, { 30 }, { 50 }, { 50 }, { 60 }, { null }]]) +!plan + + +# Test case about NOT UNIQUE sub-query when sub-query return 0 row +select * from emp where not unique (select deptno from dept where false); ++-------+--------+--------+ +| ENAME | DEPTNO | GENDER | ++-------+--------+--------+ ++-------+--------+--------+ +(0 rows) + +!ok + +EnumerableValues(tuples=[[]]) +!plan + +# Same as previous; but is Scalar sub-query +select not unique (select deptno from dept where false) from dept; ++--------+ +| EXPR$0 | ++--------+ +| false | +| false | +| false | +| false | ++--------+ +(4 rows) + +!ok + +EnumerableCalc(expr#0..1=[{inputs}], expr#2=[false], EXPR$0=[$t2]) + EnumerableValues(tuples=[[{ 10, 'Sales ' }, { 20, 'Marketing ' }, { 30, 'Engineering' }, { 40, 'Empty ' }]]) +!plan + # End sub-query.iq