Skip to content

Commit 3ab9495

Browse files
use aggregation engine over distinct engine when overlapping order by used
Signed-off-by: Harshit Gangal <harshit@planetscale.com>
1 parent 5af661e commit 3ab9495

File tree

3 files changed

+159
-22
lines changed

3 files changed

+159
-22
lines changed

go/vt/vtgate/planbuilder/operators/queryprojection.go

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy
387387

388388
func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
389389
if qp.Distinct && !qp.HasAggr {
390-
// grouping and distinct both lead to unique results, so we don't need
391-
qp.groupByExprs = nil
390+
if qp.useGroupingOverDistinct(ctx) {
391+
// if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct.
392+
qp.Distinct = false
393+
} else {
394+
// grouping and distinct both lead to unique results, so we don't need
395+
qp.groupByExprs = nil
396+
}
392397
}
393398

394399
if qp.HasAggr && len(qp.groupByExprs) == 0 {
@@ -850,6 +855,45 @@ func (qp *QueryProjection) GetColumnCount() int {
850855
return len(qp.SelectExprs) - qp.AddedColumn
851856
}
852857

858+
func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.PlanningContext) bool {
859+
for _, expr := range qp.OrderExprs {
860+
idx, _ := qp.FindSelectExprIndexForExpr(ctx, expr.SimplifiedExpr)
861+
if idx != nil {
862+
return true
863+
}
864+
}
865+
return false
866+
}
867+
868+
func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool {
869+
if !qp.orderByOverlapWithSelectExpr(ctx) {
870+
return false
871+
}
872+
var gbs []GroupBy
873+
for idx, selExpr := range qp.SelectExprs {
874+
ae, err := selExpr.GetAliasedExpr()
875+
if err != nil {
876+
// not an alias Expr, cannot continue forward.
877+
return false
878+
}
879+
sExpr := qp.GetSimplifiedExpr(ae.Expr)
880+
// check if the grouping already exists on that column.
881+
found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool {
882+
return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr)
883+
})
884+
if found != -1 {
885+
continue
886+
}
887+
groupBy := NewGroupBy(ae.Expr, sExpr, ae)
888+
selectExprIdx := idx
889+
groupBy.InnerIndex = &selectExprIdx
890+
891+
gbs = append(gbs, groupBy)
892+
}
893+
qp.groupByExprs = append(qp.groupByExprs, gbs...)
894+
return true
895+
}
896+
853897
func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
854898
return sqlparser.Walk(func(node sqlparser.SQLNode) (bool, error) {
855899
if _, isAggregate := node.(sqlparser.AggrFunc); isAggregate {

go/vt/vtgate/planbuilder/testdata/oltp_cases.json

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -106,28 +106,21 @@
106106
"QueryType": "SELECT",
107107
"Original": "SELECT DISTINCT c FROM sbtest30 WHERE id BETWEEN 1 AND 10 ORDER BY c",
108108
"Instructions": {
109-
"OperatorType": "Sort",
110-
"Variant": "Memory",
111-
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
109+
"OperatorType": "Aggregate",
110+
"Variant": "Ordered",
111+
"GroupBy": "0 COLLATE latin1_swedish_ci",
112112
"Inputs": [
113113
{
114-
"OperatorType": "Distinct",
115-
"Collations": [
116-
"0: latin1_swedish_ci"
117-
],
118-
"Inputs": [
119-
{
120-
"OperatorType": "Route",
121-
"Variant": "Scatter",
122-
"Keyspace": {
123-
"Name": "main",
124-
"Sharded": true
125-
},
126-
"FieldQuery": "select c from sbtest30 where 1 != 1",
127-
"Query": "select distinct c from sbtest30 where id between 1 and 10",
128-
"Table": "sbtest30"
129-
}
130-
]
114+
"OperatorType": "Route",
115+
"Variant": "Scatter",
116+
"Keyspace": {
117+
"Name": "main",
118+
"Sharded": true
119+
},
120+
"FieldQuery": "select c from sbtest30 where 1 != 1 group by c",
121+
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
122+
"Query": "select c from sbtest30 where id between 1 and 10 group by c order by c asc",
123+
"Table": "sbtest30"
131124
}
132125
]
133126
},

go/vt/vtgate/planbuilder/testdata/postprocess_cases.json

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,5 +2081,105 @@
20812081
"user.user"
20822082
]
20832083
}
2084+
},
2085+
{
2086+
"comment": "distinct with order by using aggregation engine",
2087+
"query": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
2088+
"plan": {
2089+
"QueryType": "SELECT",
2090+
"Original": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
2091+
"Instructions": {
2092+
"OperatorType": "Aggregate",
2093+
"Variant": "Ordered",
2094+
"GroupBy": "0",
2095+
"Inputs": [
2096+
{
2097+
"OperatorType": "Route",
2098+
"Variant": "Scatter",
2099+
"Keyspace": {
2100+
"Name": "user",
2101+
"Sharded": true
2102+
},
2103+
"FieldQuery": "select col from `user` where 1 != 1 group by col",
2104+
"OrderBy": "0 ASC",
2105+
"Query": "select col from `user` where id between :vtg1 and :vtg2 group by col order by col asc",
2106+
"Table": "`user`"
2107+
}
2108+
]
2109+
},
2110+
"TablesUsed": [
2111+
"user.user"
2112+
]
2113+
}
2114+
},
2115+
{
2116+
"comment": "distinct with order by having additional non-order by columns in the selection using aggregation engine",
2117+
"query": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
2118+
"plan": {
2119+
"QueryType": "SELECT",
2120+
"Original": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
2121+
"Instructions": {
2122+
"OperatorType": "Aggregate",
2123+
"Variant": "Ordered",
2124+
"GroupBy": "1, (0|2)",
2125+
"ResultColumns": 2,
2126+
"Inputs": [
2127+
{
2128+
"OperatorType": "Route",
2129+
"Variant": "Scatter",
2130+
"Keyspace": {
2131+
"Name": "user",
2132+
"Sharded": true
2133+
},
2134+
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1 group by col, foo, weight_string(foo)",
2135+
"OrderBy": "1 ASC, (0|2) ASC",
2136+
"Query": "select foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2 group by col, foo, weight_string(foo) order by col asc, foo asc",
2137+
"Table": "`user`"
2138+
}
2139+
]
2140+
},
2141+
"TablesUsed": [
2142+
"user.user"
2143+
]
2144+
}
2145+
},
2146+
{
2147+
"comment": "distinct with order by having no overalap with the selection columns - using distinct engine",
2148+
"query": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
2149+
"plan": {
2150+
"QueryType": "SELECT",
2151+
"Original": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
2152+
"Instructions": {
2153+
"OperatorType": "Sort",
2154+
"Variant": "Memory",
2155+
"OrderBy": "1 ASC",
2156+
"ResultColumns": 1,
2157+
"Inputs": [
2158+
{
2159+
"OperatorType": "Distinct",
2160+
"Collations": [
2161+
"(0:2)",
2162+
"1"
2163+
],
2164+
"Inputs": [
2165+
{
2166+
"OperatorType": "Route",
2167+
"Variant": "Scatter",
2168+
"Keyspace": {
2169+
"Name": "user",
2170+
"Sharded": true
2171+
},
2172+
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1",
2173+
"Query": "select distinct foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2",
2174+
"Table": "`user`"
2175+
}
2176+
]
2177+
}
2178+
]
2179+
},
2180+
"TablesUsed": [
2181+
"user.user"
2182+
]
2183+
}
20842184
}
20852185
]

0 commit comments

Comments
 (0)