From 9df4b66550e46b5d7079e21ed0e1b0f49f92b055 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Wed, 8 May 2024 12:21:33 +0200 Subject: [PATCH] Merge pull request from GHSA-649x-hxfx-57j2 * collations: Fix OOM and handle padding for multibyte This fixes the OOM issue where a simple query can trigger a denial of service attack. It also ensures we return the right result for these queries by doing the correct padding. Signed-off-by: Dirkjan Bussink * Address review comments Signed-off-by: Dirkjan Bussink --------- Signed-off-by: Dirkjan Bussink --- go/mysql/collations/charset/convert.go | 2 +- go/mysql/collations/charset/helpers.go | 2 +- go/mysql/collations/charset/unicode/utf16.go | 6 +-- go/mysql/collations/charset/unicode/utf32.go | 2 +- go/vt/vtgate/evalengine/compiler_asm.go | 3 +- go/vt/vtgate/evalengine/compiler_test.go | 24 ++++++++++ go/vt/vtgate/evalengine/expr_collate.go | 46 ++++++++++++++++---- go/vt/vtgate/evalengine/translate.go | 2 +- 8 files changed, 71 insertions(+), 16 deletions(-) diff --git a/go/mysql/collations/charset/convert.go b/go/mysql/collations/charset/convert.go index bc51e9b8377..261ef7c9b4a 100644 --- a/go/mysql/collations/charset/convert.go +++ b/go/mysql/collations/charset/convert.go @@ -72,7 +72,7 @@ func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) for len(src) > 0 { cp, width := srcCharset.DecodeRune(src) - if cp == utf8.RuneError && width < 3 { + if cp == utf8.RuneError { failed++ cp = '?' } diff --git a/go/mysql/collations/charset/helpers.go b/go/mysql/collations/charset/helpers.go index 851ce4bebf9..b66a6c77b87 100644 --- a/go/mysql/collations/charset/helpers.go +++ b/go/mysql/collations/charset/helpers.go @@ -41,7 +41,7 @@ func Validate(charset Charset, input []byte) bool { } for len(input) > 0 { r, size := charset.DecodeRune(input) - if r == RuneError && size < 2 { + if r == RuneError { return false } input = input[size:] diff --git a/go/mysql/collations/charset/unicode/utf16.go b/go/mysql/collations/charset/unicode/utf16.go index eb055db7382..924c12be7b7 100644 --- a/go/mysql/collations/charset/unicode/utf16.go +++ b/go/mysql/collations/charset/unicode/utf16.go @@ -67,7 +67,7 @@ func (Charset_utf16be) EncodeRune(dst []byte, r rune) int { func (Charset_utf16be) DecodeRune(b []byte) (rune, int) { if len(b) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(b) } r1 := uint16(b[1]) | uint16(b[0])<<8 @@ -129,7 +129,7 @@ func (Charset_utf16le) EncodeRune(dst []byte, r rune) int { func (Charset_utf16le) DecodeRune(b []byte) (rune, int) { if len(b) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(b) } r1 := uint16(b[0]) | uint16(b[1])<<8 @@ -185,7 +185,7 @@ func (Charset_ucs2) EncodeRune(dst []byte, r rune) int { func (Charset_ucs2) DecodeRune(p []byte) (rune, int) { if len(p) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(p) } return rune(p[0])<<8 | rune(p[1]), 2 } diff --git a/go/mysql/collations/charset/unicode/utf32.go b/go/mysql/collations/charset/unicode/utf32.go index 97095bb7f98..6053d7d10f8 100644 --- a/go/mysql/collations/charset/unicode/utf32.go +++ b/go/mysql/collations/charset/unicode/utf32.go @@ -49,7 +49,7 @@ func (Charset_utf32) EncodeRune(dst []byte, r rune) int { func (Charset_utf32) DecodeRune(p []byte) (rune, int) { if len(p) < 4 { - return utf8.RuneError, 0 + return utf8.RuneError, len(p) } return (rune(p[0]) << 24) | (rune(p[1]) << 16) | (rune(p[2]) << 8) | rune(p[3]), 4 } diff --git a/go/vt/vtgate/evalengine/compiler_asm.go b/go/vt/vtgate/evalengine/compiler_asm.go index 2cda3ecb348..6c8896bb1f4 100644 --- a/go/vt/vtgate/evalengine/compiler_asm.go +++ b/go/vt/vtgate/evalengine/compiler_asm.go @@ -5068,7 +5068,8 @@ func (asm *assembler) Fn_REGEXP_REPLACE_slow(merged collations.TypedCollation, f func (asm *assembler) Introduce(offset int, t sqltypes.Type, col collations.TypedCollation) { asm.emit(func(env *ExpressionEnv) int { - arg := evalToBinary(env.vm.stack[env.vm.sp-offset]) + var arg *evalBytes + arg, env.vm.err = introducerCast(env.vm.stack[env.vm.sp-offset], col.Collation) arg.tt = int16(t) arg.col = col env.vm.stack[env.vm.sp-offset] = arg diff --git a/go/vt/vtgate/evalengine/compiler_test.go b/go/vt/vtgate/evalengine/compiler_test.go index b2d4ff0c2f0..04eb72ad4f2 100644 --- a/go/vt/vtgate/evalengine/compiler_test.go +++ b/go/vt/vtgate/evalengine/compiler_test.go @@ -699,6 +699,30 @@ func TestCompilerSingle(t *testing.T) { result: `DATETIME("2023-10-24 12:00:00.000000")`, typeWanted: evalengine.NewTypeEx(sqltypes.Datetime, collations.CollationBinaryID, false, 6, 0, nil), }, + { + expression: `convert(0xFF using utf16)`, + result: `VARCHAR("ÿ")`, + }, + { + expression: `_utf16 0xFF`, + result: `VARCHAR("ÿ")`, + }, + { + expression: `convert(0xFF using utf32)`, + result: `NULL`, + }, + { + expression: `cast(_utf32 0xFF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, + { + expression: `cast(_utf32 0x00FF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, + { + expression: `cast(_utf32 0x0000FF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, } tz, _ := time.LoadLocation("Europe/Madrid") diff --git a/go/vt/vtgate/evalengine/expr_collate.go b/go/vt/vtgate/evalengine/expr_collate.go index bab0e5e52f9..be0eb78882b 100644 --- a/go/vt/vtgate/evalengine/expr_collate.go +++ b/go/vt/vtgate/evalengine/expr_collate.go @@ -18,6 +18,8 @@ package evalengine import ( "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/charset" + "vitess.io/vitess/go/mysql/collations/colldata" "vitess.io/vitess/go/sqltypes" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/vterrors" @@ -131,20 +133,48 @@ func (expr *CollateExpr) compile(c *compiler) (ctype, error) { var _ IR = (*IntroducerExpr)(nil) +func introducerCast(e eval, col collations.ID) (*evalBytes, error) { + if col == collations.CollationBinaryID { + return evalToBinary(e), nil + } + + var bytes []byte + if b, ok := e.(*evalBytes); !ok { + bytes = b.ToRawBytes() + } else { + cs := colldata.Lookup(col).Charset() + bytes = b.bytes + // We only need to pad here for encodings that have a minimum + // character byte width larger than 1, which is all UTF-16 + // variations and UTF-32. + switch cs.(type) { + case charset.Charset_utf16, charset.Charset_utf16le, charset.Charset_ucs2: + if len(bytes)%2 != 0 { + bytes = append([]byte{0}, bytes...) + } + case charset.Charset_utf32: + if mod := len(bytes) % 4; mod != 0 { + bytes = append(make([]byte, 4-mod), bytes...) + } + } + } + typedcol := collations.TypedCollation{ + Collation: col, + Coercibility: collations.CoerceCoercible, + Repertoire: collations.RepertoireASCII, + } + return newEvalText(bytes, typedcol), nil +} + func (expr *IntroducerExpr) eval(env *ExpressionEnv) (eval, error) { e, err := expr.Inner.eval(env) if err != nil { return nil, err } - var b *evalBytes - if expr.TypedCollation.Collation == collations.CollationBinaryID { - b = evalToBinary(e) - } else { - b, err = evalToVarchar(e, expr.TypedCollation.Collation, false) - if err != nil { - return nil, err - } + b, err := introducerCast(e, expr.TypedCollation.Collation) + if err != nil { + return nil, err } b.flag |= flagExplicitCollation return b, nil diff --git a/go/vt/vtgate/evalengine/translate.go b/go/vt/vtgate/evalengine/translate.go index 99ffd956513..0091f06a633 100644 --- a/go/vt/vtgate/evalengine/translate.go +++ b/go/vt/vtgate/evalengine/translate.go @@ -373,7 +373,7 @@ func (ast *astCompiler) translateIntroducerExpr(introduced *sqlparser.Introducer case collations.CollationBinaryID: lit.inner = evalToBinary(lit.inner) default: - lit.inner, err = evalToVarchar(lit.inner, collation, false) + lit.inner, err = introducerCast(lit.inner, collation) if err != nil { return nil, err }