Skip to content

Commit 2e847cd

Browse files
Complex Expression handling for uncorrelated IN and NOT IN subqueries (#16439)
Signed-off-by: Manan Gupta <manan@planetscale.com>
1 parent 00cba23 commit 2e847cd

File tree

3 files changed

+175
-6
lines changed

3 files changed

+175
-6
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
create table t1
2+
(
3+
id1 bigint,
4+
id2 bigint,
5+
primary key (id1)
6+
) Engine = InnoDB;
7+
8+
create table t2
9+
(
10+
id3 bigint,
11+
id4 bigint,
12+
primary key (id3)
13+
) Engine = InnoDB;
14+
15+
INSERT INTO t1 (id1, id2) VALUES
16+
(0, 0),
17+
(1, 1),
18+
(2, 2),
19+
(3, 3),
20+
(4, 4);
21+
22+
INSERT INTO t2 (id3, id4) VALUES
23+
(0, 0),
24+
(1, 1);
25+
26+
# Aggregation query with multiple expressions one of which is an IN subquery.
27+
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2);
28+
# Aggregation query with a complex expression that has an IN subquery.
29+
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2);
30+
# Aggregation query with multiple expressions one of which is an IN subquery that returns empty results.
31+
SELECT count(*) FROM t1 WHERE id1 = 0 AND id1 IN (SELECT id4 FROM t2 where id4 = 3);
32+
# Aggregation query with a complex expression that has an IN subquery that returns empty results.
33+
SELECT count(*) FROM t1 WHERE id1 = 2 OR id1 IN (SELECT id4 FROM t2 where id4 = 3);
34+
35+
# Aggregation query with multiple expressions one of which is an NOT IN subquery.
36+
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2);
37+
# Aggregation query with a complex expression that has an NOT IN subquery.
38+
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2);
39+
# Aggregation query with multiple expressions one of which is an NOT IN subquery that returns empty results.
40+
SELECT count(*) FROM t1 WHERE id1 = 2 AND id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);
41+
# Aggregation query with a complex expression that has an NOT IN subquery that returns empty results.
42+
SELECT count(*) FROM t1 WHERE id1 = 0 OR id1 NOT IN (SELECT id4 FROM t2 where id4 = 3);

go/vt/vtgate/planbuilder/operators/subquery.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,9 @@ func (sq *SubQuery) GetMergePredicates() []sqlparser.Expr {
208208
}
209209

210210
func (sq *SubQuery) settle(ctx *plancontext.PlanningContext, outer Operator) Operator {
211-
if !sq.TopLevel {
211+
// We can allow uncorrelated queries even when subquery isn't the top level construct,
212+
// like if its underneath an Aggregator, because they will be pulled out and run separately.
213+
if !sq.TopLevel && sq.correlated {
212214
panic(subqueryNotAtTopErr)
213215
}
214216
if sq.correlated && sq.FilterType != opcode.PulloutExists {
@@ -253,6 +255,20 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
253255
}
254256
post := func(cursor *sqlparser.CopyOnWriteCursor) {
255257
node := cursor.Node()
258+
// For IN and NOT IN type filters, we have to add a Expression that checks if we got any rows back or not
259+
// for correctness. That expression should be ANDed with the expression that has the IN/NOT IN comparison.
260+
if compExpr, isCompExpr := node.(*sqlparser.ComparisonExpr); sq.FilterType.NeedsListArg() && isCompExpr {
261+
if listArg, isListArg := compExpr.Right.(sqlparser.ListArg); isListArg && listArg.String() == sq.ArgName {
262+
if sq.FilterType == opcode.PulloutIn {
263+
cursor.Replace(sqlparser.AndExpressions(sqlparser.NewArgument(hasValuesArg()), compExpr))
264+
} else {
265+
cursor.Replace(&sqlparser.OrExpr{
266+
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
267+
Right: compExpr,
268+
})
269+
}
270+
}
271+
}
256272
if _, ok := node.(*sqlparser.Subquery); !ok {
257273
return
258274
}
@@ -277,13 +293,18 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
277293
sq.FilterType = opcode.PulloutExists // it's the same pullout as EXISTS, just with a NOT in front of the predicate
278294
predicates = append(predicates, sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())))
279295
case opcode.PulloutIn:
280-
predicates = append(predicates, sqlparser.NewArgument(hasValuesArg()), rhsPred)
296+
// Because we replace the comparison expression with an AND expression, it might be the top level construct there.
297+
// In this case, it is better to send the two sides of the AND expression separately in the predicates because it can
298+
// lead to better routing. This however might not always be true for example we can have the rhsPred to be something like
299+
// `user.id = 2 OR (:__sq_has_values AND user.id IN ::sql1)`
300+
if andExpr, isAndExpr := rhsPred.(*sqlparser.AndExpr); isAndExpr {
301+
predicates = append(predicates, andExpr.Left, andExpr.Right)
302+
} else {
303+
predicates = append(predicates, rhsPred)
304+
}
281305
sq.SubqueryValueName = sq.ArgName
282306
case opcode.PulloutNotIn:
283-
predicates = append(predicates, &sqlparser.OrExpr{
284-
Left: sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())),
285-
Right: rhsPred,
286-
})
307+
predicates = append(predicates, rhsPred)
287308
sq.SubqueryValueName = sq.ArgName
288309
case opcode.PulloutValue:
289310
predicates = append(predicates, rhsPred)

go/vt/vtgate/planbuilder/testdata/select_cases.json

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,112 @@
18831883
]
18841884
}
18851885
},
1886+
{
1887+
"comment": "Complex expression in a subquery used in IN clause of an aggregate query",
1888+
"query": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
1889+
"plan": {
1890+
"QueryType": "SELECT",
1891+
"Original": "select count(*) from user where user.id = 2 or user.id in (select id from unsharded_a where colb = 2)",
1892+
"Instructions": {
1893+
"OperatorType": "Aggregate",
1894+
"Variant": "Scalar",
1895+
"Aggregates": "sum_count_star(0) AS count(*)",
1896+
"Inputs": [
1897+
{
1898+
"OperatorType": "UncorrelatedSubquery",
1899+
"Variant": "PulloutIn",
1900+
"PulloutVars": [
1901+
"__sq_has_values",
1902+
"__sq1"
1903+
],
1904+
"Inputs": [
1905+
{
1906+
"InputName": "SubQuery",
1907+
"OperatorType": "Route",
1908+
"Variant": "Unsharded",
1909+
"Keyspace": {
1910+
"Name": "main",
1911+
"Sharded": false
1912+
},
1913+
"FieldQuery": "select id from unsharded_a where 1 != 1",
1914+
"Query": "select id from unsharded_a where colb = 2",
1915+
"Table": "unsharded_a"
1916+
},
1917+
{
1918+
"InputName": "Outer",
1919+
"OperatorType": "Route",
1920+
"Variant": "Scatter",
1921+
"Keyspace": {
1922+
"Name": "user",
1923+
"Sharded": true
1924+
},
1925+
"FieldQuery": "select count(*) from `user` where 1 != 1",
1926+
"Query": "select count(*) from `user` where `user`.id = 2 or :__sq_has_values and `user`.id in ::__sq1",
1927+
"Table": "`user`"
1928+
}
1929+
]
1930+
}
1931+
]
1932+
},
1933+
"TablesUsed": [
1934+
"main.unsharded_a",
1935+
"user.user"
1936+
]
1937+
}
1938+
},
1939+
{
1940+
"comment": "Complex expression in a subquery used in NOT IN clause of an aggregate query",
1941+
"query": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
1942+
"plan": {
1943+
"QueryType": "SELECT",
1944+
"Original": "select count(*) from user where user.id = 2 or user.id not in (select id from unsharded_a where colb = 2)",
1945+
"Instructions": {
1946+
"OperatorType": "Aggregate",
1947+
"Variant": "Scalar",
1948+
"Aggregates": "sum_count_star(0) AS count(*)",
1949+
"Inputs": [
1950+
{
1951+
"OperatorType": "UncorrelatedSubquery",
1952+
"Variant": "PulloutNotIn",
1953+
"PulloutVars": [
1954+
"__sq_has_values",
1955+
"__sq1"
1956+
],
1957+
"Inputs": [
1958+
{
1959+
"InputName": "SubQuery",
1960+
"OperatorType": "Route",
1961+
"Variant": "Unsharded",
1962+
"Keyspace": {
1963+
"Name": "main",
1964+
"Sharded": false
1965+
},
1966+
"FieldQuery": "select id from unsharded_a where 1 != 1",
1967+
"Query": "select id from unsharded_a where colb = 2",
1968+
"Table": "unsharded_a"
1969+
},
1970+
{
1971+
"InputName": "Outer",
1972+
"OperatorType": "Route",
1973+
"Variant": "Scatter",
1974+
"Keyspace": {
1975+
"Name": "user",
1976+
"Sharded": true
1977+
},
1978+
"FieldQuery": "select count(*) from `user` where 1 != 1",
1979+
"Query": "select count(*) from `user` where `user`.id = 2 or (not :__sq_has_values or `user`.id not in ::__sq1)",
1980+
"Table": "`user`"
1981+
}
1982+
]
1983+
}
1984+
]
1985+
},
1986+
"TablesUsed": [
1987+
"main.unsharded_a",
1988+
"user.user"
1989+
]
1990+
}
1991+
},
18861992
{
18871993
"comment": "testing SingleRow Projection with arithmetics",
18881994
"query": "select 42+2",

0 commit comments

Comments
 (0)