Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complex Expression handling for uncorrelated IN and NOT IN subqueries #16439

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions go/test/endtoend/vtgate/vitess_tester/subquery/subquery.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
create table t1
(
id1 bigint,
id2 bigint,
primary key (id1)
) Engine = InnoDB;

create table t2
(
id3 bigint,
id4 bigint,
primary key (id3)
) Engine = InnoDB;

INSERT INTO t1 (id1, id2) VALUES
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4);

INSERT INTO t2 (id3, id4) VALUES
(0, 0),
(1, 1);

# Aggregation query with multiple expressions one of which is an IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2);
# Aggregation query with a complex expression that has an IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2);
# Aggregation query with multiple expressions one of which is an IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2 where id4 = 3);
# Aggregation query with a complex expression that has an IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2 where id4 = 3);

# Aggregation query with multiple expressions one of which is an NOT IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2);
# Aggregation query with a complex expression that has an NOT IN subquery.
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2);
# Aggregation query with multiple expressions one of which is an NOT IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);
# Aggregation query with a complex expression that has an NOT IN subquery that returns empty results.
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);
33 changes: 27 additions & 6 deletions go/vt/vtgate/planbuilder/operators/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@ func (sq *SubQuery) GetMergePredicates() []sqlparser.Expr {
}

func (sq *SubQuery) settle(ctx *plancontext.PlanningContext, outer Operator) Operator {
if !sq.TopLevel {
// We can allow uncorrelated queries even when subquery isn't the top level construct,
// like if its underneath an Aggregator, because they will be pulled out and run separately.
if !sq.TopLevel && sq.correlated {
frouioui marked this conversation as resolved.
Show resolved Hide resolved
panic(subqueryNotAtTopErr)
}
if sq.correlated && sq.FilterType != opcode.PulloutExists {
Expand Down Expand Up @@ -253,6 +255,20 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
}
post := func(cursor *sqlparser.CopyOnWriteCursor) {
node := cursor.Node()
// For IN and NOT IN type filters, we have to add a Expression that checks if we got any rows back or not
// for correctness. That expression should be ANDed with the expression that has the IN/NOT IN comparison.
if compExpr, isCompExpr := node.(*sqlparser.ComparisonExpr); sq.FilterType.NeedsListArg() && isCompExpr {
if listArg, isListArg := compExpr.Right.(sqlparser.ListArg); isListArg && listArg.String() == sq.ArgName {
frouioui marked this conversation as resolved.
Show resolved Hide resolved
if sq.FilterType == opcode.PulloutIn {
cursor.Replace(sqlparser.AndExpressions(sqlparser.NewArgument(hasValuesArg()), compExpr))
} else {
cursor.Replace(&sqlparser.OrExpr{
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
Right: compExpr,
})
}
}
}
if _, ok := node.(*sqlparser.Subquery); !ok {
return
}
Expand All @@ -277,13 +293,18 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
sq.FilterType = opcode.PulloutExists // it's the same pullout as EXISTS, just with a NOT in front of the predicate
predicates = append(predicates, sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())))
case opcode.PulloutIn:
predicates = append(predicates, sqlparser.NewArgument(hasValuesArg()), rhsPred)
// Because we replace the comparison expression with an AND expression, it might be the top level construct there.
// In this case, it is better to send the two sides of the AND expression separately in the predicates because it can
// lead to better routing. This however might not always be true for example we can have the rhsPred to be something like
// `user.id = 2 OR (:__sq_has_values AND user.id IN ::sql1)`
if andExpr, isAndExpr := rhsPred.(*sqlparser.AndExpr); isAndExpr {
predicates = append(predicates, andExpr.Left, andExpr.Right)
} else {
predicates = append(predicates, rhsPred)
}
sq.SubqueryValueName = sq.ArgName
case opcode.PulloutNotIn:
predicates = append(predicates, &sqlparser.OrExpr{
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
Right: rhsPred,
})
predicates = append(predicates, rhsPred)
sq.SubqueryValueName = sq.ArgName
case opcode.PulloutValue:
predicates = append(predicates, rhsPred)
Expand Down
106 changes: 106 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/select_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -1883,6 +1883,112 @@
]
}
},
{
"comment": "Complex expression in a subquery used in IN clause of an aggregate query",
"query": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
"plan": {
"QueryType": "SELECT",
"Original": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "sum_count_star(0) AS count(*)",
"Inputs": [
{
"OperatorType": "UncorrelatedSubquery",
"Variant": "PulloutIn",
"PulloutVars": [
"__sq_has_values",
"__sq1"
],
"Inputs": [
{
"InputName": "SubQuery",
"OperatorType": "Route",
"Variant": "Unsharded",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select id from unsharded_a where 1 != 1",
"Query": "select id from unsharded_a where colb = 2",
"Table": "unsharded_a"
},
{
"InputName": "Outer",
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select count(*) from `user` where 1 != 1",
"Query": "select count(*) from `user` where `user`.id = 2 or :__sq_has_values and `user`.id in ::__sq1",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"main.unsharded_a",
"user.user"
]
}
},
{
"comment": "Complex expression in a subquery used in NOT IN clause of an aggregate query",
"query": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
"plan": {
"QueryType": "SELECT",
"Original": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "sum_count_star(0) AS count(*)",
"Inputs": [
{
"OperatorType": "UncorrelatedSubquery",
"Variant": "PulloutNotIn",
"PulloutVars": [
"__sq_has_values",
"__sq1"
],
"Inputs": [
{
"InputName": "SubQuery",
"OperatorType": "Route",
"Variant": "Unsharded",
"Keyspace": {
"Name": "main",
"Sharded": false
},
"FieldQuery": "select id from unsharded_a where 1 != 1",
"Query": "select id from unsharded_a where colb = 2",
"Table": "unsharded_a"
},
{
"InputName": "Outer",
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select count(*) from `user` where 1 != 1",
"Query": "select count(*) from `user` where `user`.id = 2 or (not :__sq_has_values or `user`.id not in ::__sq1)",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"main.unsharded_a",
"user.user"
]
}
},
{
"comment": "testing SingleRow Projection with arithmetics",
"query": "select 42+2",
Expand Down
Loading