From fb9446bd383ff24b27c8e1478f016a138fe116f5 Mon Sep 17 00:00:00 2001 From: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> Date: Thu, 13 Jun 2024 14:21:27 +0530 Subject: [PATCH] Fix dual merging in outer join queries (#15959) Signed-off-by: Manan Gupta --- go/test/endtoend/vtgate/gen4/gen4_test.go | 31 +++ .../planbuilder/operators/join_merging.go | 25 ++- go/vt/vtgate/planbuilder/operators/joins.go | 2 +- .../planbuilder/testdata/select_cases.json | 206 ++++++++++++++++++ 4 files changed, 253 insertions(+), 11 deletions(-) diff --git a/go/test/endtoend/vtgate/gen4/gen4_test.go b/go/test/endtoend/vtgate/gen4/gen4_test.go index f284f85e883..a242ef0bb7a 100644 --- a/go/test/endtoend/vtgate/gen4/gen4_test.go +++ b/go/test/endtoend/vtgate/gen4/gen4_test.go @@ -489,3 +489,34 @@ func TestPercentageAndUnderscore(t *testing.T) { mcmp.Exec(`select a.tcol1 from t2 a join t2 b where a.tcol1 = b.tcol2 group by a.tcol1 having repeat(a.tcol1,min(a.id)) = "C_DC_D" order by a.tcol1`) mcmp.Exec(`select a.tcol1 from t2 a join t2 b where a.tcol1 = b.tcol2 group by a.tcol1 having repeat(a.tcol1,min(a.id)) = "C\_DC\_D" order by a.tcol1`) } + +// TestDualJoinQueries tests that queries having a join between a dual query and another query work as intended. +func TestDualJoinQueries(t *testing.T) { + mcmp, closer := start(t) + defer closer() + + mcmp.Exec(`insert into t2(id, tcol1, tcol2) values (1, 'ABC', 'ABC'),(2, 'DEF', 'DEF')`) + + // Left join with a dual table on left - merge-able + mcmp.Exec("select t.title, t2.id from (select 'ABC' as title) as t left join t2 on t2.id=1 and t.title = t2.tcol1") + mcmp.Exec("select t.title, t2.id from (select 'DEF' as title) as t left join t2 on t2.id=1 and t.title = t2.tcol1") + + // Left join with a dual table on left - non-merge-able + mcmp.Exec("select t.title, t2.id from (select 'ABC' as title) as t left join t2 on t2.id < 2 and t.title = t2.tcol1") + mcmp.Exec("select t.title, t2.id from (select 'DEF' as title) as t left join t2 on t2.id < 2 and t.title = t2.tcol1") + + // right join with a dual table on left + mcmp.Exec("select t.title, t2.id from (select 'ABC' as title) as t right join t2 on t.title = t2.tcol1") + + // Right join with a dual table on right - merge-able + mcmp.Exec("select t.title, t2.id from t2 right join (select 'ABC' as title) as t on t2.id=1 and t.title = t2.tcol1") + mcmp.Exec("select t.title, t2.id from t2 right join (select 'DEF' as title) as t on t2.id=1 and t.title = t2.tcol1") + + // Right join with a dual table on right - non-merge-able + mcmp.Exec("select t.title, t2.id from t2 right join (select 'ABC' as title) as t on t2.id < 2 and t.title = t2.tcol1") + mcmp.Exec("select t.title, t2.id from t2 right join (select 'DEF' as title) as t on t2.id < 2 and t.title = t2.tcol1") + + // left join with a dual table on right + mcmp.Exec("select t.title, t2.id from t2 left join (select 'ABC' as title) as t on t.title = t2.tcol1") + +} diff --git a/go/vt/vtgate/planbuilder/operators/join_merging.go b/go/vt/vtgate/planbuilder/operators/join_merging.go index 5edc812b1b7..8bba8ba57d9 100644 --- a/go/vt/vtgate/planbuilder/operators/join_merging.go +++ b/go/vt/vtgate/planbuilder/operators/join_merging.go @@ -27,17 +27,28 @@ import ( // mergeJoinInputs checks whether two operators can be merged into a single one. // If they can be merged, a new operator with the merged routing is returned // If they cannot be merged, nil is returned. -func mergeJoinInputs(ctx *plancontext.PlanningContext, lhs, rhs Operator, joinPredicates []sqlparser.Expr, m merger) *Route { +func mergeJoinInputs(ctx *plancontext.PlanningContext, lhs, rhs Operator, joinPredicates []sqlparser.Expr, m *joinMerger) *Route { lhsRoute, rhsRoute, routingA, routingB, a, b, sameKeyspace := prepareInputRoutes(lhs, rhs) if lhsRoute == nil { return nil } switch { - // if either side is a dual query, we can always merge them together case a == dual: - return m.merge(ctx, lhsRoute, rhsRoute, routingB) + // We clone the right hand side and try and push all the join predicates that are solved entirely by that side. + // If a dual is on the left side and it is a left join (all right joins are changed to left joins), then we can only merge if the right side is a single sharded routing. + rhsClone := Clone(rhs).(*Route) + for _, predicate := range joinPredicates { + if ctx.SemTable.DirectDeps(predicate).IsSolvedBy(TableID(rhsClone)) { + rhsClone.AddPredicate(ctx, predicate) + } + } + if !m.joinType.IsInner() && !rhsClone.Routing.OpCode().IsSingleShard() { + return nil + } + return m.merge(ctx, lhsRoute, rhsClone, rhsClone.Routing) case b == dual: + // If a dual is on the right side. return m.merge(ctx, lhsRoute, rhsRoute, routingA) // an unsharded/reference route can be merged with anything going to that keyspace @@ -74,12 +85,6 @@ func prepareInputRoutes(lhs Operator, rhs Operator) (*Route, *Route, Routing, Ro lhsRoute, rhsRoute, routingA, routingB, sameKeyspace := getRoutesOrAlternates(lhsRoute, rhsRoute) a, b := getRoutingType(routingA), getRoutingType(routingB) - if getTypeName(routingA) < getTypeName(routingB) { - // while deciding if two routes can be merged, the LHS/RHS order of the routes is not important. - // for the actual merging, we still need to remember which side was inner and which was outer for subqueries - a, b = b, a - routingA, routingB = routingB, routingA - } return lhsRoute, rhsRoute, routingA, routingB, a, b, sameKeyspace } @@ -178,7 +183,7 @@ func getRoutingType(r Routing) routingType { panic(fmt.Sprintf("switch should be exhaustive, got %T", r)) } -func newJoinMerge(predicates []sqlparser.Expr, joinType sqlparser.JoinType) merger { +func newJoinMerge(predicates []sqlparser.Expr, joinType sqlparser.JoinType) *joinMerger { return &joinMerger{ predicates: predicates, joinType: joinType, diff --git a/go/vt/vtgate/planbuilder/operators/joins.go b/go/vt/vtgate/planbuilder/operators/joins.go index d0d0fa770c8..86cdf06fe7e 100644 --- a/go/vt/vtgate/planbuilder/operators/joins.go +++ b/go/vt/vtgate/planbuilder/operators/joins.go @@ -44,7 +44,7 @@ func AddPredicate( joinPredicates bool, newFilter func(Operator, sqlparser.Expr) Operator, ) Operator { - deps := ctx.SemTable.RecursiveDeps(expr) + deps := ctx.SemTable.DirectDeps(expr) switch { case deps.IsSolvedBy(TableID(join.GetLHS())): // predicates can always safely be pushed down to the lhs if that is all they depend on diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index f94c75f084b..7cc1b58f7b0 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -2072,6 +2072,212 @@ ] } }, + { + "comment": "left join with a dual table on left - merge-able", + "query": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id=1", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id=1", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select t.title, `user`.col from (select 'hello' as title from dual where 1 != 1) as t left join `user` on `user`.id = 1 where 1 != 1", + "Query": "select t.title, `user`.col from (select 'hello' as title from dual) as t left join `user` on `user`.id = 1", + "Table": "`user`, dual", + "Values": [ + "1" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, + { + "comment": "left join with a dual table on left - non-merge-able", + "query": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id <= 4", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id <= 4", + "Instructions": { + "OperatorType": "Join", + "Variant": "LeftJoin", + "JoinColumnIndexes": "L:0,R:0", + "TableName": "dual_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Reference", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select t.title from (select 'hello' as title from dual where 1 != 1) as t where 1 != 1", + "Query": "select t.title from (select 'hello' as title from dual) as t", + "Table": "dual" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.col from `user` where 1 != 1", + "Query": "select `user`.col from `user` where `user`.id <= 4", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, + { + "comment": "left join with dual non-merge-able with predicate with cross dependencies", + "query": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id <= 4 and t.title = user.col", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from (select 'hello' as title) as t left join user on user.id <= 4 and t.title = user.col", + "Instructions": { + "OperatorType": "Join", + "Variant": "LeftJoin", + "JoinColumnIndexes": "L:0,R:0", + "JoinVars": { + "t_title": 0 + }, + "TableName": "dual_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Reference", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select t.title from (select 'hello' as title from dual where 1 != 1) as t where 1 != 1", + "Query": "select t.title from (select 'hello' as title from dual) as t", + "Table": "dual" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.col from `user` where 1 != 1", + "Query": "select `user`.col from `user` where `user`.col = :t_title and `user`.id <= 4", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, + { + "comment": "right join with a dual table on left", + "query": "select t.title, user.col from (select 'hello' as title) as t right join user on user.id<=4", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from (select 'hello' as title) as t right join user on user.id<=4", + "Instructions": { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select t.title, `user`.col from `user` left join (select 'hello' as title from dual where 1 != 1) as t on `user`.id <= 4 where 1 != 1", + "Query": "select t.title, `user`.col from `user` left join (select 'hello' as title from dual) as t on `user`.id <= 4", + "Table": "`user`, dual" + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, + { + "comment": "right join with a dual table on right - merge-able", + "query": "select t.title, user.col from user right join (select 'hello' as title) as t on user.id=1", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from user right join (select 'hello' as title) as t on user.id=1", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select t.title, `user`.col from (select 'hello' as title from dual where 1 != 1) as t left join `user` on `user`.id = 1 where 1 != 1", + "Query": "select t.title, `user`.col from (select 'hello' as title from dual) as t left join `user` on `user`.id = 1", + "Table": "`user`, dual", + "Values": [ + "1" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, + { + "comment": "right join with a dual table on right - non-merge-able", + "query": "select t.title, user.col from user right join (select 'hello' as title) as t on user.id>=4", + "plan": { + "QueryType": "SELECT", + "Original": "select t.title, user.col from user right join (select 'hello' as title) as t on user.id>=4", + "Instructions": { + "OperatorType": "Join", + "Variant": "LeftJoin", + "JoinColumnIndexes": "L:0,R:0", + "TableName": "dual_`user`", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Reference", + "Keyspace": { + "Name": "main", + "Sharded": false + }, + "FieldQuery": "select t.title from (select 'hello' as title from dual where 1 != 1) as t where 1 != 1", + "Query": "select t.title from (select 'hello' as title from dual) as t", + "Table": "dual" + }, + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `user`.col from `user` where 1 != 1", + "Query": "select `user`.col from `user` where `user`.id >= 4", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "main.dual", + "user.user" + ] + } + }, { "comment": "Union after into outfile is incorrect", "query": "select id from user into outfile 'out_file_name' union all select id from music",