Skip to content

Commit

Permalink
Use hash joins when nested loop joins are not feasible (vitessio#14448)
Browse files Browse the repository at this point in the history
  • Loading branch information
systay authored Nov 16, 2023
1 parent fe14d97 commit 09715e3
Show file tree
Hide file tree
Showing 55 changed files with 1,391 additions and 1,004 deletions.
4 changes: 4 additions & 0 deletions go/sqltypes/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ func IsText(t querypb.Type) bool {
return int(t)&flagIsText == flagIsText
}

func IsTextOrBinary(t querypb.Type) bool {
return int(t)&flagIsText == flagIsText || int(t)&flagIsBinary == flagIsBinary
}

// IsBinary returns true if querypb.Type is a binary.
// If you have a Value object, use its member function.
func IsBinary(t querypb.Type) bool {
Expand Down
8 changes: 4 additions & 4 deletions go/test/endtoend/vtgate/queries/aggregation/distinct_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ func TestDistinctIt(t *testing.T) {
mcmp.AssertMatchesNoOrder("select distinct id from aggr_test", `[[INT64(1)] [INT64(2)] [INT64(3)] [INT64(5)] [INT64(4)] [INT64(6)] [INT64(7)] [INT64(8)]]`)

if utils.BinaryIsAtLeastAtVersion(17, "vtgate") {
mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ distinct val1 from aggr_test order by val1 desc", `[[VARCHAR("e")] [VARCHAR("d")] [VARCHAR("c")] [VARCHAR("b")] [VARCHAR("a")]]`)
mcmp.AssertMatchesNoOrder("select /*vt+ PLANNER=Gen4 */ distinct val1, count(*) from aggr_test group by val1", `[[VARCHAR("a") INT64(2)] [VARCHAR("b") INT64(1)] [VARCHAR("c") INT64(2)] [VARCHAR("d") INT64(1)] [VARCHAR("e") INT64(2)]]`)
mcmp.AssertMatchesNoOrder("select /*vt+ PLANNER=Gen4 */ distinct val1+val2 from aggr_test", `[[NULL] [FLOAT64(1)] [FLOAT64(3)] [FLOAT64(4)]]`)
mcmp.AssertMatchesNoOrder("select /*vt+ PLANNER=Gen4 */ distinct count(*) from aggr_test group by val1", `[[INT64(2)] [INT64(1)]]`)
mcmp.AssertMatches("select distinct val1 from aggr_test order by val1 desc", `[[VARCHAR("e")] [VARCHAR("d")] [VARCHAR("c")] [VARCHAR("b")] [VARCHAR("a")]]`)
mcmp.AssertMatchesNoOrder("select distinct val1, count(*) from aggr_test group by val1", `[[VARCHAR("a") INT64(2)] [VARCHAR("b") INT64(1)] [VARCHAR("c") INT64(2)] [VARCHAR("d") INT64(1)] [VARCHAR("e") INT64(2)]]`)
mcmp.AssertMatchesNoOrder("select distinct val1+val2 from aggr_test", `[[NULL] [FLOAT64(1)] [FLOAT64(3)] [FLOAT64(4)]]`)
mcmp.AssertMatchesNoOrder("select distinct count(*) from aggr_test group by val1", `[[INT64(2)] [INT64(1)]]`)
}
}
35 changes: 30 additions & 5 deletions go/test/endtoend/vtgate/queries/derived/derived_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,22 @@ func TestDerivedTableWithOrderByLimit(t *testing.T) {
mcmp, closer := start(t)
defer closer()

mcmp.Exec("select /*vt+ PLANNER=Gen4 */ music.id from music join (select id,name from user order by id limit 2) as d on music.user_id = d.id")
mcmp.Exec("select music.id from music join (select id,name from user order by id limit 2) as d on music.user_id = d.id")
}

func TestDerivedAggregationOnRHS(t *testing.T) {
mcmp, closer := start(t)
defer closer()

mcmp.Exec("set sql_mode = ''")
mcmp.Exec("select /*vt+ PLANNER=Gen4 */ d.a from music join (select id, count(*) as a from user) as d on music.user_id = d.id group by 1")
mcmp.Exec("select d.a from music join (select id, count(*) as a from user) as d on music.user_id = d.id group by 1")
}

func TestDerivedRemoveInnerOrderBy(t *testing.T) {
mcmp, closer := start(t)
defer closer()

mcmp.Exec("select /*vt+ PLANNER=Gen4 */ count(*) from (select user.id as oui, music.id as non from user join music on user.id = music.user_id order by user.name) as toto")
mcmp.Exec("select count(*) from (select user.id as oui, music.id as non from user join music on user.id = music.user_id order by user.name) as toto")
}

func TestDerivedTableWithHaving(t *testing.T) {
Expand All @@ -76,14 +76,39 @@ func TestDerivedTableWithHaving(t *testing.T) {

mcmp.Exec("set sql_mode = ''")
// For the given query, we can get any id back, because we aren't grouping by it.
mcmp.AssertMatchesAnyNoCompare("select /*vt+ PLANNER=Gen4 */ * from (select id from user having count(*) >= 1) s",
mcmp.AssertMatchesAnyNoCompare("select * from (select id from user having count(*) >= 1) s",
"[[INT64(1)]]", "[[INT64(2)]]", "[[INT64(3)]]", "[[INT64(4)]]", "[[INT64(5)]]")
}

func TestDerivedTableColumns(t *testing.T) {
mcmp, closer := start(t)
defer closer()

mcmp.AssertMatches(`SELECT /*vt+ PLANNER=gen4 */ t.id FROM (SELECT id FROM user) AS t(id) ORDER BY t.id DESC`,
mcmp.AssertMatches(`SELECT t.id FROM (SELECT id FROM user) AS t(id) ORDER BY t.id DESC`,
`[[INT64(5)] [INT64(4)] [INT64(3)] [INT64(2)] [INT64(1)]]`)
}

// TestDerivedTablesWithLimit tests queries where we have to limit the right hand side of the join.
// We do this by not using the apply join we usually use, and instead use the hash join engine primitive
// These tests exercise these situations
func TestDerivedTablesWithLimit(t *testing.T) {
// We need full type info before planning this, so we wait for the schema tracker
require.NoError(t,
utils.WaitForAuthoritative(t, keyspaceName, "user", clusterInstance.VtgateProcess.ReadVSchema))

mcmp, closer := start(t)
defer closer()

mcmp.Exec("insert into user(id, name) values(6,'pikachu')")

mcmp.AssertMatchesNoOrder(
`SELECT u.id, m.id FROM
(SELECT id, name FROM user LIMIT 10) AS u JOIN
(SELECT id, user_id FROM music LIMIT 10) as m on u.id = m.user_id`,
`[[INT64(1) INT64(1)] [INT64(5) INT64(2)] [INT64(1) INT64(3)] [INT64(2) INT64(4)] [INT64(3) INT64(5)] [INT64(5) INT64(7)] [INT64(4) INT64(6)]]`)

mcmp.AssertMatchesNoOrder(
`SELECT u.id, m.id FROM user AS u LEFT JOIN
(SELECT id, user_id FROM music LIMIT 10) as m on u.id = m.user_id`,
`[[INT64(1) INT64(1)] [INT64(5) INT64(2)] [INT64(1) INT64(3)] [INT64(2) INT64(4)] [INT64(3) INT64(5)] [INT64(5) INT64(7)] [INT64(4) INT64(6)] [INT64(6) NULL]]`)
}
4 changes: 2 additions & 2 deletions go/test/endtoend/vtgate/queries/derived/schema.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
create table user
(
id bigint,
id bigint,
name varchar(255),
primary key (id)
) Engine = InnoDB;

create table music
(
id bigint,
id bigint,
user_id bigint,
primary key (id)
) Engine = InnoDB;
4 changes: 2 additions & 2 deletions go/test/endtoend/vtgate/queries/orderby/orderby_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func TestOrderBy(t *testing.T) {
mcmp.AssertMatches("select id1, id2 from t4 order by id1 desc", `[[INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(5) VARCHAR("test")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
// test ordering of complex column
if utils.BinaryIsAtLeastAtVersion(17, "vtgate") {
mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
mcmp.AssertMatches("select id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
}

defer func() {
Expand All @@ -80,6 +80,6 @@ func TestOrderBy(t *testing.T) {
mcmp.AssertMatches("select id1, id2 from t4 order by id2 desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
mcmp.AssertMatches("select id1, id2 from t4 order by id1 desc", `[[INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(5) VARCHAR("test")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
if utils.BinaryIsAtLeastAtVersion(17, "vtgate") {
mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
mcmp.AssertMatches("select id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
}
}
32 changes: 16 additions & 16 deletions go/test/endtoend/vtgate/queries/random/simplifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,22 @@ func TestSimplifyResultsMismatchedQuery(t *testing.T) {
t.Skip("Skip CI")

var queries []string
queries = append(queries, "select /*vt+ PLANNER=Gen4 */ (68 - -16) / case false when -45 then 3 when 28 then -43 else -62 end as crandom0 from dept as tbl0, (select /*vt+ PLANNER=Gen4 */ distinct not not false and count(*) from emp as tbl0, emp as tbl1 where tbl1.ename) as tbl1 limit 1",
"select /*vt+ PLANNER=Gen4 */ distinct case true when 'burro' then 'trout' else 'elf' end < case count(distinct true) when 'bobcat' then 'turkey' else 'penguin' end from dept as tbl0, emp as tbl1 where 'spider'",
"select /*vt+ PLANNER=Gen4 */ distinct sum(distinct tbl1.deptno) from dept as tbl0, emp as tbl1 where tbl0.deptno and tbl1.comm in (12, tbl0.deptno, case false when 67 then -17 when -78 then -35 end, -76 >> -68)",
"select /*vt+ PLANNER=Gen4 */ count(*) + 1 from emp as tbl0 order by count(*) desc",
"select /*vt+ PLANNER=Gen4 */ count(2 >> tbl2.mgr), sum(distinct tbl2.empno <=> 15) from emp as tbl0 left join emp as tbl2 on -32",
"select /*vt+ PLANNER=Gen4 */ sum(case false when true then tbl1.deptno else -154 / 132 end) as caggr1 from emp as tbl0, dept as tbl1",
"select /*vt+ PLANNER=Gen4 */ tbl1.dname as cgroup0, tbl1.dname as cgroup1 from dept as tbl0, dept as tbl1 group by tbl1.dname, tbl1.deptno order by tbl1.deptno desc",
"select /*vt+ PLANNER=Gen4 */ tbl0.ename as cgroup1 from emp as tbl0 group by tbl0.job, tbl0.ename having sum(tbl0.mgr) = sum(tbl0.mgr) order by tbl0.job desc, tbl0.ename asc limit 8",
"select /*vt+ PLANNER=Gen4 */ distinct count(*) as caggr1 from dept as tbl0, emp as tbl1 group by tbl1.sal having max(tbl1.comm) != true",
"select /*vt+ PLANNER=Gen4 */ distinct sum(tbl1.loc) as caggr0 from dept as tbl0, dept as tbl1 group by tbl1.deptno having max(tbl1.dname) <= 1",
"select /*vt+ PLANNER=Gen4 */ min(tbl0.deptno) as caggr0 from dept as tbl0, emp as tbl1 where case when false then tbl0.dname end group by tbl1.comm",
"select /*vt+ PLANNER=Gen4 */ count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 1 = 0",
"select /*vt+ PLANNER=Gen4 */ count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 'octopus'",
"select /*vt+ PLANNER=Gen4 */ distinct 'octopus' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.empno having count(*) = count(*)",
"select /*vt+ PLANNER=Gen4 */ max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno",
"select /*vt+ PLANNER=Gen4 */ count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal")
queries = append(queries, "select (68 - -16) / case false when -45 then 3 when 28 then -43 else -62 end as crandom0 from dept as tbl0, (select distinct not not false and count(*) from emp as tbl0, emp as tbl1 where tbl1.ename) as tbl1 limit 1",
"select distinct case true when 'burro' then 'trout' else 'elf' end < case count(distinct true) when 'bobcat' then 'turkey' else 'penguin' end from dept as tbl0, emp as tbl1 where 'spider'",
"select distinct sum(distinct tbl1.deptno) from dept as tbl0, emp as tbl1 where tbl0.deptno and tbl1.comm in (12, tbl0.deptno, case false when 67 then -17 when -78 then -35 end, -76 >> -68)",
"select count(*) + 1 from emp as tbl0 order by count(*) desc",
"select count(2 >> tbl2.mgr), sum(distinct tbl2.empno <=> 15) from emp as tbl0 left join emp as tbl2 on -32",
"select sum(case false when true then tbl1.deptno else -154 / 132 end) as caggr1 from emp as tbl0, dept as tbl1",
"select tbl1.dname as cgroup0, tbl1.dname as cgroup1 from dept as tbl0, dept as tbl1 group by tbl1.dname, tbl1.deptno order by tbl1.deptno desc",
"select tbl0.ename as cgroup1 from emp as tbl0 group by tbl0.job, tbl0.ename having sum(tbl0.mgr) = sum(tbl0.mgr) order by tbl0.job desc, tbl0.ename asc limit 8",
"select distinct count(*) as caggr1 from dept as tbl0, emp as tbl1 group by tbl1.sal having max(tbl1.comm) != true",
"select distinct sum(tbl1.loc) as caggr0 from dept as tbl0, dept as tbl1 group by tbl1.deptno having max(tbl1.dname) <= 1",
"select min(tbl0.deptno) as caggr0 from dept as tbl0, emp as tbl1 where case when false then tbl0.dname end group by tbl1.comm",
"select count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 1 = 0",
"select count(*) as caggr0, 1 as crandom0 from dept as tbl0, emp as tbl1 where 'octopus'",
"select distinct 'octopus' as crandom0 from dept as tbl0, emp as tbl1 where tbl0.deptno = tbl1.empno having count(*) = count(*)",
"select max(tbl0.deptno) from dept as tbl0 right join emp as tbl1 on tbl0.deptno = tbl1.empno and tbl0.deptno = tbl1.deptno group by tbl0.deptno",
"select count(tbl1.comm) from emp as tbl1 right join emp as tbl2 on tbl1.mgr = tbl2.sal")

for _, query := range queries {
var simplified string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func TestReferenceRouting(t *testing.T) {
utils.AssertMatches(
t,
conn,
`SELECT /*vt+ PLANNER=gen4 */ COUNT(zd.id)
`SELECT COUNT(zd.id)
FROM delivery_failure df
JOIN zip_detail zd ON zd.id = df.zip_detail_id WHERE zd.id = 3`,
`[[INT64(0)]]`,
Expand Down
2 changes: 1 addition & 1 deletion go/test/endtoend/vtgate/queries/subquery/subquery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func TestSubqueryInUpdate(t *testing.T) {
utils.Exec(t, conn, `insert into t1(id1, id2) values (1, 10), (2, 20), (3, 30), (4, 40), (5, 50)`)
utils.Exec(t, conn, `insert into t2(id3, id4) values (1, 3), (2, 4)`)
utils.AssertMatches(t, conn, `SELECT id2, keyspace_id FROM t1_id2_idx WHERE id2 IN (2,10)`, `[[INT64(10) VARBINARY("\x16k@\xb4J\xbaK\xd6")]]`)
utils.Exec(t, conn, `update /*vt+ PLANNER=gen4 */ t1 set id2 = (select count(*) from t2) where id1 = 1`)
utils.Exec(t, conn, `update t1 set id2 = (select count(*) from t2) where id1 = 1`)
utils.AssertMatches(t, conn, `SELECT id2 FROM t1 WHERE id1 = 1`, `[[INT64(2)]]`)
utils.AssertMatches(t, conn, `SELECT id2, keyspace_id FROM t1_id2_idx WHERE id2 IN (2,10)`, `[[INT64(2) VARBINARY("\x16k@\xb4J\xbaK\xd6")]]`)
}
Expand Down
30 changes: 27 additions & 3 deletions go/vt/vtgate/engine/fake_vcursor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ import (
"context"
"fmt"
"reflect"
"slices"
"sort"
"strings"
"sync"
"testing"
"time"

"github.com/google/go-cmp/cmp"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/test/utils"
Expand Down Expand Up @@ -806,18 +809,39 @@ func (t *noopVCursor) GetLogs() ([]ExecuteEntry, error) {
return nil, nil
}

func expectResult(t *testing.T, msg string, result, want *sqltypes.Result) {
func expectResult(t *testing.T, result, want *sqltypes.Result) {
t.Helper()
fieldsResult := fmt.Sprintf("%v", result.Fields)
fieldsWant := fmt.Sprintf("%v", want.Fields)
if fieldsResult != fieldsWant {
t.Errorf("%s (mismatch in Fields):\n%s\nwant:\n%s", msg, fieldsResult, fieldsWant)
t.Errorf("mismatch in Fields\n%s\nwant:\n%s", fieldsResult, fieldsWant)
}

rowsResult := fmt.Sprintf("%v", result.Rows)
rowsWant := fmt.Sprintf("%v", want.Rows)
if rowsResult != rowsWant {
t.Errorf("%s (mismatch in Rows):\n%s\nwant:\n%s", msg, rowsResult, rowsWant)
t.Errorf("mismatch in Rows:\n%s\nwant:\n%s", rowsResult, rowsWant)
}
}

func expectResultAnyOrder(t *testing.T, result, want *sqltypes.Result) {
t.Helper()
f := func(a, b sqltypes.Row) int {
for i := range a {
l := a[i].RawStr()
r := b[i].RawStr()
x := strings.Compare(l, r)
if x == 0 {
continue
}
return x
}
return 0
}
slices.SortFunc(result.Rows, f)
slices.SortFunc(want.Rows, f)
if diff := cmp.Diff(want, result); diff != "" {
t.Errorf("result: %+v, want %+v\ndiff: %s", result, want, diff)
}
}

Expand Down
Loading

0 comments on commit 09715e3

Please sign in to comment.