Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-18.0] make sure to handle unsupported collations well (#15134) #15142

Merged
merged 2 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults {
func (env *Environment) LookupCharsetName(coll ID) string {
return env.byCharsetName[coll]
}

func (env *Environment) IsSupported(coll ID) bool {
_, supported := env.byID[coll]
return supported
}
3 changes: 1 addition & 2 deletions go/vt/vtgate/engine/aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ func NewAggregateParam(opcode AggregateOpcode, col int, alias string) *Aggregate
Col: col,
Alias: alias,
WCol: -1,
Type: sqltypes.Unknown,
}
if opcode.NeedsComparableValues() {
out.KeyCol = col
Expand All @@ -75,7 +74,7 @@ func (ap *AggregateParams) String() string {
if ap.WAssigned() {
keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol)
}
if sqltypes.IsText(ap.Type) && ap.CollationID != collations.Unknown {
if sqltypes.IsText(ap.Type) && collations.Local().IsSupported(ap.CollationID) {
keyCol += " COLLATE " + collations.Local().LookupName(ap.CollationID)
}
dispOrigOp := ""
Expand Down
19 changes: 8 additions & 11 deletions go/vt/vtgate/executor_select_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,25 @@ import (
"testing"
"time"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/vt/vtgate/logstats"

"vitess.io/vitess/go/vt/sqlparser"

"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/test/utils"
"vitess.io/vitess/go/vt/discovery"
"vitess.io/vitess/go/vt/vterrors"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtgatepb "vitess.io/vitess/go/vt/proto/vtgate"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/logstats"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"
)

func TestSelectNext(t *testing.T) {
Expand Down
3 changes: 1 addition & 2 deletions go/vt/vtgate/planbuilder/collations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ import (
"fmt"
"testing"

"vitess.io/vitess/go/test/vschemawrapper"

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/test/vschemawrapper"
"vitess.io/vitess/go/vt/vtgate/engine"
)

Expand Down
7 changes: 6 additions & 1 deletion go/vt/vtgate/planbuilder/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,12 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch
for _, table := range ks.Tables {
for i, col := range table.Columns {
if sqltypes.IsText(col.Type) {
table.Columns[i].CollationName = "latin1_swedish_ci"
switch col.Name.String() {
case "textcol2":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In v18, the proto files don't contain the collation name, so I can't set it on schema.json as I did on main. This is the hack I did to get collations in.

table.Columns[i].CollationName = "big5_bin"
default:
table.Columns[i].CollationName = "latin1_swedish_ci"
}
}
}
}
Expand Down
14 changes: 8 additions & 6 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4845,7 +4845,8 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"ResultColumns": 4,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4854,9 +4855,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1",
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)",
"OrderBy": "2 ASC COLLATE latin1_swedish_ci",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc",
"Table": "`user`"
}
]
Expand All @@ -4875,8 +4876,9 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"GroupBy": "0",
"ResultColumns": 5,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4885,9 +4887,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1",
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)",
"OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc",
"Table": "`user`"
}
]
Expand Down
37 changes: 34 additions & 3 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"ResultColumns": 4,
"Table": "`user`"
},
Expand Down Expand Up @@ -2174,5 +2174,36 @@
"user.user"
]
}
},
{
"comment": "DISTINCT on an unsupported collation should fall back on weightstrings",
"query": "select distinct textcol2 from user",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct textcol2 from user",
"Instructions": {
"OperatorType": "Distinct",
"Collations": [
"(0:1): "
],
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1",
"Query": "select distinct textcol2, weight_string(textcol2) from `user`",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]
11 changes: 6 additions & 5 deletions go/vt/vtgate/semantics/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati
}

// Creation of the semantic table
semTable := analyzer.newSemTable(statement, si.ConnCollation())

return semTable, nil
return analyzer.newSemTable(statement, si.ConnCollation())
}

// AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors
Expand All @@ -96,7 +94,10 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf
return st, nil
}

func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID) *SemTable {
func (a *analyzer) newSemTable(
statement sqlparser.Statement,
coll collations.ID,
) (*SemTable, error) {
var comments *sqlparser.ParsedComments
commentedStmt, isCommented := statement.(sqlparser.Commented)
if isCommented {
Expand All @@ -122,7 +123,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID
columns: columns,
StatementIDs: a.scoper.statementIDs,
QuerySignature: a.sig,
}
}, nil
}

func (a *analyzer) setError(err error) {
Expand Down
9 changes: 7 additions & 2 deletions go/vt/vtgate/semantics/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1506,8 +1506,13 @@ func fakeSchemaInfo() *FakeSI {
Name: sqlparser.NewIdentifierCI("uid"),
Type: querypb.Type_INT64,
}, {
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
CollationName: "utf8_bin",
}, {
Name: sqlparser.NewIdentifierCI("textcol"),
Type: querypb.Type_VARCHAR,
CollationName: "big5_bin",
}}

si := &FakeSI{
Expand Down
7 changes: 2 additions & 5 deletions go/vt/vtgate/semantics/real_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,8 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo {
cols := make([]ColumnInfo, 0, len(tbl.Columns))
for _, col := range tbl.Columns {
collation := collations.DefaultCollationForType(col.Type)
if sqltypes.IsText(col.Type) {
coll, found := collations.Local().LookupID(col.CollationName)
if found {
collation = coll
}
if sqltypes.IsText(col.Type) && col.CollationName != "" {
collation, _ = collations.Local().LookupID(col.CollationName)
}

cols = append(cols, ColumnInfo{
Expand Down
7 changes: 6 additions & 1 deletion go/vt/vtgate/semantics/semantic_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool {
if !found {
return true
}
return typ.Collation == collations.Unknown && !sqltypes.IsNumber(typ.Type)

if sqltypes.IsNumber(typ.Type) {
return false
}

return !collations.Local().IsSupported(typ.Collation)
}
}

Expand Down
36 changes: 36 additions & 0 deletions go/vt/vtgate/semantics/typer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations/colldata"
querypb "vitess.io/vitess/go/vt/proto/query"
"vitess.io/vitess/go/vt/sqlparser"
)
Expand Down Expand Up @@ -54,5 +55,40 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) {
require.Equal(t, test.typ, typ.Type.String())
})
}
}

// Tests that the types correctly picks up and sets the collation on columns
func TestColumnCollations(t *testing.T) {
tests := []struct {
query, collation string
}{
{query: "select textcol from t2"},
{query: "select name from t2", collation: "utf8mb3_bin"},
}

for _, test := range tests {
t.Run(test.query, func(t *testing.T) {
parse, err := sqlparser.Parse(test.query)
require.NoError(t, err)

err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{})
require.NoError(t, err)

st, err := Analyze(parse, "d", fakeSchemaInfo())
require.NoError(t, err)
col := extract(parse.(*sqlparser.Select), 0)
typ, coll, found := st.TypeForExpr(col)
require.True(t, found, "column was not typed")

require.Equal(t, "VARCHAR", typ.String())
collation := colldata.Lookup(coll)
if test.collation != "" {
collation := colldata.Lookup(coll)
require.NotNil(t, collation)
require.Equal(t, test.collation, collation.Name())
} else {
require.Nil(t, collation)
}
})
}
}
10 changes: 4 additions & 6 deletions go/vt/vtgate/vindexes/vschema.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,15 @@ import (
"strings"
"time"

"vitess.io/vitess/go/sqlescape"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/vterrors"

"vitess.io/vitess/go/json2"
"vitess.io/vitess/go/sqlescape"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/sqlparser"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vschemapb "vitess.io/vitess/go/vt/proto/vschema"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/vterrors"
)

// TabletTypeSuffix maps the tablet type to its suffix string.
Expand Down
Loading