From ebcc0fea222a00a9b9cdcf0f4aed961bedc07574 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Tue, 6 Feb 2024 13:18:39 +0100 Subject: [PATCH 1/2] Cherry-pick 2c7b647 with conflicts solved Signed-off-by: Andres Taylor --- go/mysql/collations/env.go | 5 +++ go/vt/vtgate/engine/aggregations.go | 3 +- go/vt/vtgate/executor_select_test.go | 19 ++++------ go/vt/vtgate/planbuilder/collations_test.go | 3 +- go/vt/vtgate/planbuilder/plan_test.go | 7 +++- .../planbuilder/testdata/aggr_cases.json | 14 ++++--- .../testdata/postprocess_cases.json | 37 +++++++++++++++++-- go/vt/vtgate/semantics/analyzer.go | 11 +++--- go/vt/vtgate/semantics/analyzer_test.go | 9 ++++- go/vt/vtgate/semantics/real_table.go | 7 +--- go/vt/vtgate/semantics/semantic_state.go | 7 +++- go/vt/vtgate/semantics/typer_test.go | 36 ++++++++++++++++++ go/vt/vtgate/vindexes/vschema.go | 10 ++--- 13 files changed, 124 insertions(+), 44 deletions(-) diff --git a/go/mysql/collations/env.go b/go/mysql/collations/env.go index 91fc2a8bd8c..878721ecf29 100644 --- a/go/mysql/collations/env.go +++ b/go/mysql/collations/env.go @@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults { func (env *Environment) LookupCharsetName(coll ID) string { return env.byCharsetName[coll] } + +func (env *Environment) IsSupported(coll ID) bool { + _, supported := env.byID[coll] + return supported +} diff --git a/go/vt/vtgate/engine/aggregations.go b/go/vt/vtgate/engine/aggregations.go index 8037dda37a9..63634adb87c 100644 --- a/go/vt/vtgate/engine/aggregations.go +++ b/go/vt/vtgate/engine/aggregations.go @@ -58,7 +58,6 @@ func NewAggregateParam(opcode AggregateOpcode, col int, alias string) *Aggregate Col: col, Alias: alias, WCol: -1, - Type: sqltypes.Unknown, } if opcode.NeedsComparableValues() { out.KeyCol = col @@ -75,7 +74,7 @@ func (ap *AggregateParams) String() string { if ap.WAssigned() { keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol) } - if sqltypes.IsText(ap.Type) && ap.CollationID != collations.Unknown { + if sqltypes.IsText(ap.Type) && collations.Local().IsSupported(ap.CollationID) { keyCol += " COLLATE " + collations.Local().LookupName(ap.CollationID) } dispOrigOp := "" diff --git a/go/vt/vtgate/executor_select_test.go b/go/vt/vtgate/executor_select_test.go index a1512b55b92..37702a52f2d 100644 --- a/go/vt/vtgate/executor_select_test.go +++ b/go/vt/vtgate/executor_select_test.go @@ -26,28 +26,25 @@ import ( "testing" "time" - _flag "vitess.io/vitess/go/internal/flag" - "vitess.io/vitess/go/mysql/collations" - "vitess.io/vitess/go/streamlog" - "vitess.io/vitess/go/vt/vtgate/logstats" - - "vitess.io/vitess/go/vt/sqlparser" - "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + _flag "vitess.io/vitess/go/internal/flag" + "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/streamlog" "vitess.io/vitess/go/test/utils" "vitess.io/vitess/go/vt/discovery" - "vitess.io/vitess/go/vt/vterrors" - _ "vitess.io/vitess/go/vt/vtgate/vindexes" - "vitess.io/vitess/go/vt/vttablet/sandboxconn" - querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vtgatepb "vitess.io/vitess/go/vt/proto/vtgate" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgate/logstats" + _ "vitess.io/vitess/go/vt/vtgate/vindexes" + "vitess.io/vitess/go/vt/vttablet/sandboxconn" ) func TestSelectNext(t *testing.T) { diff --git a/go/vt/vtgate/planbuilder/collations_test.go b/go/vt/vtgate/planbuilder/collations_test.go index 24fb038b4c2..c845200d46d 100644 --- a/go/vt/vtgate/planbuilder/collations_test.go +++ b/go/vt/vtgate/planbuilder/collations_test.go @@ -20,11 +20,10 @@ import ( "fmt" "testing" - "vitess.io/vitess/go/test/vschemawrapper" - "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/test/vschemawrapper" "vitess.io/vitess/go/vt/vtgate/engine" ) diff --git a/go/vt/vtgate/planbuilder/plan_test.go b/go/vt/vtgate/planbuilder/plan_test.go index e563926269a..a7450db3a05 100644 --- a/go/vt/vtgate/planbuilder/plan_test.go +++ b/go/vt/vtgate/planbuilder/plan_test.go @@ -501,7 +501,12 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch for _, table := range ks.Tables { for i, col := range table.Columns { if sqltypes.IsText(col.Type) { - table.Columns[i].CollationName = "latin1_swedish_ci" + switch col.Name.String() { + case "textcol2": + table.Columns[i].CollationName = "big5_bin" + default: + table.Columns[i].CollationName = "latin1_swedish_ci" + } } } } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index a59e289a7e9..c8d0baa7b2d 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -4845,7 +4845,8 @@ "Instructions": { "OperatorType": "Aggregate", "Variant": "Scalar", - "Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "ResultColumns": 4, "Inputs": [ { "OperatorType": "Route", @@ -4854,9 +4855,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1", + "FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)", "OrderBy": "2 ASC COLLATE latin1_swedish_ci", - "Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc", + "Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc", "Table": "`user`" } ] @@ -4875,8 +4876,9 @@ "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", "GroupBy": "0", + "ResultColumns": 5, "Inputs": [ { "OperatorType": "Route", @@ -4885,9 +4887,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1", + "FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)", "OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci", - "Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc", + "Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc", "Table": "`user`" } ] diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json index 622eda14832..097877c14a5 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json @@ -342,9 +342,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1", - "OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci", - "Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc", + "FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1", + "OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ", + "Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc", "ResultColumns": 4, "Table": "`user`" }, @@ -2174,5 +2174,36 @@ "user.user" ] } + }, + { + "comment": "DISTINCT on an unsupported collation should fall back on weightstrings", + "query": "select distinct textcol2 from user", + "plan": { + "QueryType": "SELECT", + "Original": "select distinct textcol2 from user", + "Instructions": { + "OperatorType": "Distinct", + "Collations": [ + "(0:1): " + ], + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1", + "Query": "select distinct textcol2, weight_string(textcol2) from `user`", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } } ] diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 1cb457f6882..e4d566d7191 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -74,9 +74,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati } // Creation of the semantic table - semTable := analyzer.newSemTable(statement, si.ConnCollation()) - - return semTable, nil + return analyzer.newSemTable(statement, si.ConnCollation()) } // AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors @@ -96,7 +94,10 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf return st, nil } -func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID) *SemTable { +func (a *analyzer) newSemTable( + statement sqlparser.Statement, + coll collations.ID, +) (*SemTable, error) { var comments *sqlparser.ParsedComments commentedStmt, isCommented := statement.(sqlparser.Commented) if isCommented { @@ -122,7 +123,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID columns: columns, StatementIDs: a.scoper.statementIDs, QuerySignature: a.sig, - } + }, nil } func (a *analyzer) setError(err error) { diff --git a/go/vt/vtgate/semantics/analyzer_test.go b/go/vt/vtgate/semantics/analyzer_test.go index 21222da2263..fc372909f7c 100644 --- a/go/vt/vtgate/semantics/analyzer_test.go +++ b/go/vt/vtgate/semantics/analyzer_test.go @@ -1506,8 +1506,13 @@ func fakeSchemaInfo() *FakeSI { Name: sqlparser.NewIdentifierCI("uid"), Type: querypb.Type_INT64, }, { - Name: sqlparser.NewIdentifierCI("name"), - Type: querypb.Type_VARCHAR, + Name: sqlparser.NewIdentifierCI("name"), + Type: querypb.Type_VARCHAR, + CollationName: "utf8_bin", + }, { + Name: sqlparser.NewIdentifierCI("textcol"), + Type: querypb.Type_VARCHAR, + CollationName: "big5_bin", }} si := &FakeSI{ diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index bd57ab81474..2d41653bdf3 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -105,11 +105,8 @@ func vindexTableToColumnInfo(tbl *vindexes.Table) []ColumnInfo { cols := make([]ColumnInfo, 0, len(tbl.Columns)) for _, col := range tbl.Columns { collation := collations.DefaultCollationForType(col.Type) - if sqltypes.IsText(col.Type) { - coll, found := collations.Local().LookupID(col.CollationName) - if found { - collation = coll - } + if sqltypes.IsText(col.Type) && col.CollationName != "" { + collation, _ = collations.Local().LookupID(col.CollationName) } cols = append(cols, ColumnInfo{ diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index 0af935918f9..13fd11d961f 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -351,7 +351,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool { if !found { return true } - return typ.Collation == collations.Unknown && !sqltypes.IsNumber(typ.Type) + + if sqltypes.IsNumber(typ.Type) { + return false + } + + return !collations.Local().IsSupported(typ.Collation) } } diff --git a/go/vt/vtgate/semantics/typer_test.go b/go/vt/vtgate/semantics/typer_test.go index 4c77e6f5657..8933b483a2e 100644 --- a/go/vt/vtgate/semantics/typer_test.go +++ b/go/vt/vtgate/semantics/typer_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql/collations/colldata" querypb "vitess.io/vitess/go/vt/proto/query" "vitess.io/vitess/go/vt/sqlparser" ) @@ -54,5 +55,40 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) { require.Equal(t, test.typ, typ.Type.String()) }) } +} + +// Tests that the types correctly picks up and sets the collation on columns +func TestColumnCollations(t *testing.T) { + tests := []struct { + query, collation string + }{ + {query: "select textcol from t2"}, + {query: "select name from t2", collation: "utf8mb3_bin"}, + } + + for _, test := range tests { + t.Run(test.query, func(t *testing.T) { + parse, err := sqlparser.Parse(test.query) + require.NoError(t, err) + err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{}) + require.NoError(t, err) + + st, err := Analyze(parse, "d", fakeSchemaInfo()) + require.NoError(t, err) + col := extract(parse.(*sqlparser.Select), 0) + typ, coll, found := st.TypeForExpr(col) + require.True(t, found, "column was not typed") + + require.Equal(t, "VARCHAR", typ.String()) + collation := colldata.Lookup(coll) + if test.collation != "" { + collation := colldata.Lookup(coll) + require.NotNil(t, collation) + require.Equal(t, test.collation, collation.Name()) + } else { + require.Nil(t, collation) + } + }) + } } diff --git a/go/vt/vtgate/vindexes/vschema.go b/go/vt/vtgate/vindexes/vschema.go index de57908adb9..709742175fb 100644 --- a/go/vt/vtgate/vindexes/vschema.go +++ b/go/vt/vtgate/vindexes/vschema.go @@ -25,17 +25,15 @@ import ( "strings" "time" - "vitess.io/vitess/go/sqlescape" - vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" - "vitess.io/vitess/go/vt/vterrors" - "vitess.io/vitess/go/json2" + "vitess.io/vitess/go/sqlescape" "vitess.io/vitess/go/sqltypes" - "vitess.io/vitess/go/vt/sqlparser" - querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vschemapb "vitess.io/vitess/go/vt/proto/vschema" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vterrors" ) // TabletTypeSuffix maps the tablet type to its suffix string. From 2adb299789ccf6fbfe36f2b9fc305c0b68332545 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Tue, 6 Feb 2024 13:34:58 +0100 Subject: [PATCH 2/2] Empty commit to trigger CI Signed-off-by: Andres Taylor