Skip to content

Commit b7adb21

Browse files
[release-18.0] make sure to handle unsupported collations well (#15134) (#15142)
Signed-off-by: Andres Taylor <andres@planetscale.com> Co-authored-by: Andres Taylor <andres@planetscale.com>
1 parent 5ac69ec commit b7adb21

File tree

13 files changed

+124
-44
lines changed

13 files changed

+124
-44
lines changed

go/mysql/collations/env.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults {
301301
func (env *Environment) LookupCharsetName(coll ID) string {
302302
return env.byCharsetName[coll]
303303
}
304+
305+
func (env *Environment) IsSupported(coll ID) bool {
306+
_, supported := env.byID[coll]
307+
return supported
308+
}

go/vt/vtgate/engine/aggregations.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ func NewAggregateParam(opcode AggregateOpcode, col int, alias string) *Aggregate
5858
Col: col,
5959
Alias: alias,
6060
WCol: -1,
61-
Type: sqltypes.Unknown,
6261
}
6362
if opcode.NeedsComparableValues() {
6463
out.KeyCol = col
@@ -75,7 +74,7 @@ func (ap *AggregateParams) String() string {
7574
if ap.WAssigned() {
7675
keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol)
7776
}
78-
if sqltypes.IsText(ap.Type) && ap.CollationID != collations.Unknown {
77+
if sqltypes.IsText(ap.Type) && collations.Local().IsSupported(ap.CollationID) {
7978
keyCol += " COLLATE " + collations.Local().LookupName(ap.CollationID)
8079
}
8180
dispOrigOp := ""

go/vt/vtgate/executor_select_test.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,25 @@ import (
2626
"testing"
2727
"time"
2828

29-
_flag "vitess.io/vitess/go/internal/flag"
30-
"vitess.io/vitess/go/mysql/collations"
31-
"vitess.io/vitess/go/streamlog"
32-
"vitess.io/vitess/go/vt/vtgate/logstats"
33-
34-
"vitess.io/vitess/go/vt/sqlparser"
35-
3629
"github.com/google/go-cmp/cmp"
3730
"github.com/stretchr/testify/assert"
3831
"github.com/stretchr/testify/require"
3932

33+
_flag "vitess.io/vitess/go/internal/flag"
34+
"vitess.io/vitess/go/mysql/collations"
4035
"vitess.io/vitess/go/sqltypes"
36+
"vitess.io/vitess/go/streamlog"
4137
"vitess.io/vitess/go/test/utils"
4238
"vitess.io/vitess/go/vt/discovery"
43-
"vitess.io/vitess/go/vt/vterrors"
44-
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
45-
"vitess.io/vitess/go/vt/vttablet/sandboxconn"
46-
4739
querypb "vitess.io/vitess/go/vt/proto/query"
4840
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
4941
vtgatepb "vitess.io/vitess/go/vt/proto/vtgate"
5042
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
43+
"vitess.io/vitess/go/vt/sqlparser"
44+
"vitess.io/vitess/go/vt/vterrors"
45+
"vitess.io/vitess/go/vt/vtgate/logstats"
46+
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
47+
"vitess.io/vitess/go/vt/vttablet/sandboxconn"
5148
)
5249

5350
func TestSelectNext(t *testing.T) {

go/vt/vtgate/planbuilder/collations_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,10 @@ import (
2020
"fmt"
2121
"testing"
2222

23-
"vitess.io/vitess/go/test/vschemawrapper"
24-
2523
"github.com/stretchr/testify/require"
2624

2725
"vitess.io/vitess/go/mysql/collations"
26+
"vitess.io/vitess/go/test/vschemawrapper"
2827
"vitess.io/vitess/go/vt/vtgate/engine"
2928
)
3029

go/vt/vtgate/planbuilder/plan_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,12 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch
501501
for _, table := range ks.Tables {
502502
for i, col := range table.Columns {
503503
if sqltypes.IsText(col.Type) {
504-
table.Columns[i].CollationName = "latin1_swedish_ci"
504+
switch col.Name.String() {
505+
case "textcol2":
506+
table.Columns[i].CollationName = "big5_bin"
507+
default:
508+
table.Columns[i].CollationName = "latin1_swedish_ci"
509+
}
505510
}
506511
}
507512
}

go/vt/vtgate/planbuilder/testdata/aggr_cases.json

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4845,7 +4845,8 @@
48454845
"Instructions": {
48464846
"OperatorType": "Aggregate",
48474847
"Variant": "Scalar",
4848-
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
4848+
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
4849+
"ResultColumns": 4,
48494850
"Inputs": [
48504851
{
48514852
"OperatorType": "Route",
@@ -4854,9 +4855,9 @@
48544855
"Name": "user",
48554856
"Sharded": true
48564857
},
4857-
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1",
4858+
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)",
48584859
"OrderBy": "2 ASC COLLATE latin1_swedish_ci",
4859-
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc",
4860+
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc",
48604861
"Table": "`user`"
48614862
}
48624863
]
@@ -4875,8 +4876,9 @@
48754876
"Instructions": {
48764877
"OperatorType": "Aggregate",
48774878
"Variant": "Ordered",
4878-
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
4879+
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
48794880
"GroupBy": "0",
4881+
"ResultColumns": 5,
48804882
"Inputs": [
48814883
{
48824884
"OperatorType": "Route",
@@ -4885,9 +4887,9 @@
48854887
"Name": "user",
48864888
"Sharded": true
48874889
},
4888-
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1",
4890+
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)",
48894891
"OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci",
4890-
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc",
4892+
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc",
48914893
"Table": "`user`"
48924894
}
48934895
]

go/vt/vtgate/planbuilder/testdata/postprocess_cases.json

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,9 @@
342342
"Name": "user",
343343
"Sharded": true
344344
},
345-
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1",
346-
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci",
347-
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
345+
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1",
346+
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ",
347+
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
348348
"ResultColumns": 4,
349349
"Table": "`user`"
350350
},
@@ -2174,5 +2174,36 @@
21742174
"user.user"
21752175
]
21762176
}
2177+
},
2178+
{
2179+
"comment": "DISTINCT on an unsupported collation should fall back on weightstrings",
2180+
"query": "select distinct textcol2 from user",
2181+
"plan": {
2182+
"QueryType": "SELECT",
2183+
"Original": "select distinct textcol2 from user",
2184+
"Instructions": {
2185+
"OperatorType": "Distinct",
2186+
"Collations": [
2187+
"(0:1): "
2188+
],
2189+
"ResultColumns": 1,
2190+
"Inputs": [
2191+
{
2192+
"OperatorType": "Route",
2193+
"Variant": "Scatter",
2194+
"Keyspace": {
2195+
"Name": "user",
2196+
"Sharded": true
2197+
},
2198+
"FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1",
2199+
"Query": "select distinct textcol2, weight_string(textcol2) from `user`",
2200+
"Table": "`user`"
2201+
}
2202+
]
2203+
},
2204+
"TablesUsed": [
2205+
"user.user"
2206+
]
2207+
}
21772208
}
21782209
]

go/vt/vtgate/semantics/analyzer.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati
7474
}
7575

7676
// Creation of the semantic table
77-
semTable := analyzer.newSemTable(statement, si.ConnCollation())
78-
79-
return semTable, nil
77+
return analyzer.newSemTable(statement, si.ConnCollation())
8078
}
8179

8280
// AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors
@@ -96,7 +94,10 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf
9694
return st, nil
9795
}
9896

99-
func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID) *SemTable {
97+
func (a *analyzer) newSemTable(
98+
statement sqlparser.Statement,
99+
coll collations.ID,
100+
) (*SemTable, error) {
100101
var comments *sqlparser.ParsedComments
101102
commentedStmt, isCommented := statement.(sqlparser.Commented)
102103
if isCommented {
@@ -122,7 +123,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID
122123
columns: columns,
123124
StatementIDs: a.scoper.statementIDs,
124125
QuerySignature: a.sig,
125-
}
126+
}, nil
126127
}
127128

128129
func (a *analyzer) setError(err error) {

go/vt/vtgate/semantics/analyzer_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,8 +1506,13 @@ func fakeSchemaInfo() *FakeSI {
15061506
Name: sqlparser.NewIdentifierCI("uid"),
15071507
Type: querypb.Type_INT64,
15081508
}, {
1509-
Name: sqlparser.NewIdentifierCI("name"),
1510-
Type: querypb.Type_VARCHAR,
1509+
Name: sqlparser.NewIdentifierCI("name"),
1510+
Type: querypb.Type_VARCHAR,
1511+
CollationName: "utf8_bin",
1512+
}, {
1513+
Name: sqlparser.NewIdentifierCI("textcol"),
1514+
Type: querypb.Type_VARCHAR,
1515+
CollationName: "big5_bin",
15111516
}}
15121517

15131518
si := &FakeSI{

go/vt/vtgate/semantics/real_table.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,8 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo {
105105
cols := make([]ColumnInfo, 0, len(tbl.Columns))
106106
for _, col := range tbl.Columns {
107107
collation := collations.DefaultCollationForType(col.Type)
108-
if sqltypes.IsText(col.Type) {
109-
coll, found := collations.Local().LookupID(col.CollationName)
110-
if found {
111-
collation = coll
112-
}
108+
if sqltypes.IsText(col.Type) && col.CollationName != "" {
109+
collation, _ = collations.Local().LookupID(col.CollationName)
113110
}
114111

115112
cols = append(cols, ColumnInfo{

go/vt/vtgate/semantics/semantic_state.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool {
351351
if !found {
352352
return true
353353
}
354-
return typ.Collation == collations.Unknown && !sqltypes.IsNumber(typ.Type)
354+
355+
if sqltypes.IsNumber(typ.Type) {
356+
return false
357+
}
358+
359+
return !collations.Local().IsSupported(typ.Collation)
355360
}
356361
}
357362

go/vt/vtgate/semantics/typer_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121

2222
"github.com/stretchr/testify/require"
2323

24+
"vitess.io/vitess/go/mysql/collations/colldata"
2425
querypb "vitess.io/vitess/go/vt/proto/query"
2526
"vitess.io/vitess/go/vt/sqlparser"
2627
)
@@ -54,5 +55,40 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) {
5455
require.Equal(t, test.typ, typ.Type.String())
5556
})
5657
}
58+
}
59+
60+
// Tests that the types correctly picks up and sets the collation on columns
61+
func TestColumnCollations(t *testing.T) {
62+
tests := []struct {
63+
query, collation string
64+
}{
65+
{query: "select textcol from t2"},
66+
{query: "select name from t2", collation: "utf8mb3_bin"},
67+
}
68+
69+
for _, test := range tests {
70+
t.Run(test.query, func(t *testing.T) {
71+
parse, err := sqlparser.Parse(test.query)
72+
require.NoError(t, err)
5773

74+
err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{})
75+
require.NoError(t, err)
76+
77+
st, err := Analyze(parse, "d", fakeSchemaInfo())
78+
require.NoError(t, err)
79+
col := extract(parse.(*sqlparser.Select), 0)
80+
typ, coll, found := st.TypeForExpr(col)
81+
require.True(t, found, "column was not typed")
82+
83+
require.Equal(t, "VARCHAR", typ.String())
84+
collation := colldata.Lookup(coll)
85+
if test.collation != "" {
86+
collation := colldata.Lookup(coll)
87+
require.NotNil(t, collation)
88+
require.Equal(t, test.collation, collation.Name())
89+
} else {
90+
require.Nil(t, collation)
91+
}
92+
})
93+
}
5894
}

go/vt/vtgate/vindexes/vschema.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,15 @@ import (
2525
"strings"
2626
"time"
2727

28-
"vitess.io/vitess/go/sqlescape"
29-
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
30-
"vitess.io/vitess/go/vt/vterrors"
31-
3228
"vitess.io/vitess/go/json2"
29+
"vitess.io/vitess/go/sqlescape"
3330
"vitess.io/vitess/go/sqltypes"
34-
"vitess.io/vitess/go/vt/sqlparser"
35-
3631
querypb "vitess.io/vitess/go/vt/proto/query"
3732
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
3833
vschemapb "vitess.io/vitess/go/vt/proto/vschema"
34+
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
35+
"vitess.io/vitess/go/vt/sqlparser"
36+
"vitess.io/vitess/go/vt/vterrors"
3937
)
4038

4139
// TabletTypeSuffix maps the tablet type to its suffix string.

0 commit comments

Comments
 (0)