Skip to content

Commit f9ce09c

Browse files
committed
evalengine: Implement SUBSTRING
This implements `SUBSTRING` (and the `SUBSTR` alias) in the `evalengine`. Also fixes some unsupported calls to be more explicit with a separate error type for regexp. Those don't end up really unsupported, but we then compile a slow path. Also implement some cases that are trivial to do. Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>
1 parent ce203d2 commit f9ce09c

File tree

10 files changed

+252
-9
lines changed

10 files changed

+252
-9
lines changed

go/vt/vtgate/evalengine/cached_size.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/vt/vtgate/evalengine/compiler_asm.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
"github.com/google/uuid"
3737

38+
"vitess.io/vitess/go/mysql/collations/charset/types"
3839
"vitess.io/vitess/go/mysql/collations/colldata"
3940

4041
"vitess.io/vitess/go/hack"
@@ -2725,6 +2726,72 @@ func (asm *assembler) Fn_TRIM2(col collations.TypedCollation) {
27252726
}, "FN TRIM VARCHAR(SP-2) VARCHAR(SP-1)")
27262727
}
27272728

2729+
func (asm *assembler) Fn_SUBSTRING2(tt sqltypes.Type, cs types.Charset, col collations.TypedCollation) {
2730+
asm.adjustStack(-1)
2731+
asm.emit(func(env *ExpressionEnv) int {
2732+
str := env.vm.stack[env.vm.sp-2].(*evalBytes)
2733+
pos := env.vm.stack[env.vm.sp-1].(*evalInt64)
2734+
2735+
end := int64(charset.Length(cs, str.bytes))
2736+
if pos.i < 0 {
2737+
pos.i += end + 1
2738+
}
2739+
if pos.i < 1 || pos.i > end {
2740+
str.tt = int16(tt)
2741+
str.bytes = nil
2742+
str.col = col
2743+
env.vm.sp--
2744+
return 1
2745+
}
2746+
2747+
res := charset.Slice(cs, str.bytes, int(pos.i-1), int(end))
2748+
str.tt = int16(tt)
2749+
str.bytes = res
2750+
str.col = col
2751+
env.vm.sp--
2752+
return 1
2753+
}, "FN SUBSTRING VARCHAR(SP-2) INT64(SP-1)")
2754+
}
2755+
2756+
func (asm *assembler) Fn_SUBSTRING3(tt sqltypes.Type, cs types.Charset, col collations.TypedCollation) {
2757+
asm.adjustStack(-2)
2758+
asm.emit(func(env *ExpressionEnv) int {
2759+
str := env.vm.stack[env.vm.sp-3].(*evalBytes)
2760+
pos := env.vm.stack[env.vm.sp-2].(*evalInt64)
2761+
ll := env.vm.stack[env.vm.sp-1].(*evalInt64)
2762+
2763+
end := int64(charset.Length(cs, str.bytes))
2764+
if pos.i < 0 {
2765+
pos.i += end + 1
2766+
}
2767+
if pos.i < 1 || pos.i > end {
2768+
str.tt = int16(tt)
2769+
str.bytes = nil
2770+
str.col = col
2771+
env.vm.sp -= 2
2772+
return 1
2773+
}
2774+
2775+
if ll.i < 1 {
2776+
str.tt = int16(tt)
2777+
str.bytes = nil
2778+
str.col = col
2779+
env.vm.sp -= 2
2780+
return 1
2781+
}
2782+
if ll.i > end-pos.i+1 {
2783+
ll.i = end - pos.i + 1
2784+
}
2785+
end = pos.i + ll.i - 1
2786+
res := charset.Slice(cs, str.bytes, int(pos.i-1), int(end))
2787+
str.tt = int16(tt)
2788+
str.bytes = res
2789+
str.col = col
2790+
env.vm.sp -= 2
2791+
return 1
2792+
}, "FN SUBSTRING VARCHAR(SP-3) INT64(SP-2) INT64(SP-1)")
2793+
}
2794+
27282795
func (asm *assembler) Fn_TO_BASE64(t sqltypes.Type, col collations.TypedCollation) {
27292796
asm.emit(func(env *ExpressionEnv) int {
27302797
str := env.vm.stack[env.vm.sp-1].(*evalBytes)

go/vt/vtgate/evalengine/expr_collate.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,12 @@ func (expr *CollateExpr) compile(c *compiler) (ctype, error) {
118118
case sqltypes.VarBinary:
119119
c.asm.Collate(expr.TypedCollation.Collation)
120120
default:
121-
return ctype{}, c.unsupported(expr)
121+
c.asm.Convert_xc(1, sqltypes.VarChar, expr.TypedCollation.Collation, 0, false)
122122
}
123123

124124
c.asm.jumpDestination(skip)
125125

126+
ct.Type = sqltypes.VarChar
126127
ct.Col = expr.TypedCollation
127128
ct.Flag |= flagExplicitCollation | flagNullable
128129
return ct, nil

go/vt/vtgate/evalengine/expr_compare.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ func (expr *InExpr) compile(c *compiler) (ctype, error) {
568568

569569
return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: flagIsBoolean | (nullableFlags(lhs.Flag) | (rt.Flag & flagNullable))}, nil
570570
case *BindVariable:
571-
return ctype{}, c.unsupported(expr)
571+
return ctype{}, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "rhs of an In operation should be a tuple")
572572
default:
573573
panic("unreachable")
574574
}

go/vt/vtgate/evalengine/expr_convert.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ func (conv *ConvertExpr) compile(c *compiler) (ctype, error) {
205205
convt = c.compileToFloat(arg, 1)
206206

207207
case "FLOAT":
208-
return ctype{}, c.unsupported(conv)
208+
return ctype{}, conv.returnUnsupportedError()
209209

210210
case "SIGNED", "SIGNED INTEGER":
211211
convt = c.compileToInt64(arg, 1)

go/vt/vtgate/evalengine/fn_compare.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ func (call *builtinMultiComparison) compile(c *compiler) (ctype, error) {
354354
case sqltypes.Null:
355355
nullable = true
356356
default:
357-
return ctype{}, c.unsupported(call)
357+
panic("unexpected argument type")
358358
}
359359
}
360360

go/vt/vtgate/evalengine/fn_regexp.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,12 @@ func compileRegex(pat eval, c colldata.Charset, flags icuregex.RegexpFlag) (*icu
210210
return nil, err
211211
}
212212

213+
var errNonConstantRegexp = errors.New("non-constant regexp")
214+
213215
func compileConstantRegex(c *compiler, args TupleExpr, pat, mt int, cs collations.TypedCollation, flags icuregex.RegexpFlag, f string) (*icuregex.Pattern, error) {
214216
pattern := args[pat]
215217
if !pattern.constant() {
216-
return nil, c.unsupported(pattern)
218+
return nil, errNonConstantRegexp
217219
}
218220
var err error
219221
staticEnv := EmptyExpressionEnv(c.collationEnv)
@@ -225,7 +227,7 @@ func compileConstantRegex(c *compiler, args TupleExpr, pat, mt int, cs collation
225227
if len(args) > mt {
226228
fl := args[mt]
227229
if !fl.constant() {
228-
return nil, c.unsupported(fl)
230+
return nil, errNonConstantRegexp
229231
}
230232
fl, err = simplifyExpr(staticEnv, fl)
231233
if err != nil {
@@ -238,7 +240,7 @@ func compileConstantRegex(c *compiler, args TupleExpr, pat, mt int, cs collation
238240
}
239241

240242
if pattern.(*Literal).inner == nil {
241-
return nil, c.unsupported(pattern)
243+
return nil, errNonConstantRegexp
242244
}
243245

244246
innerPat, err := evalToVarchar(pattern.(*Literal).inner, cs.Collation, true)

go/vt/vtgate/evalengine/fn_string.go

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ type (
8989
collate collations.ID
9090
trim sqlparser.TrimType
9191
}
92+
93+
builtinSubstring struct {
94+
CallExpr
95+
collate collations.ID
96+
}
9297
)
9398

9499
var _ IR = (*builtinChangeCase)(nil)
@@ -817,7 +822,7 @@ func (expr *builtinStrcmp) compile(c *compiler) (ctype, error) {
817822
return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: nullableFlags(lt.Flag | rt.Flag)}, nil
818823
}
819824

820-
func (call builtinTrim) eval(env *ExpressionEnv) (eval, error) {
825+
func (call *builtinTrim) eval(env *ExpressionEnv) (eval, error) {
821826
str, err := call.arg1(env)
822827
if err != nil {
823828
return nil, err
@@ -872,7 +877,7 @@ func (call builtinTrim) eval(env *ExpressionEnv) (eval, error) {
872877
}
873878
}
874879

875-
func (call builtinTrim) compile(c *compiler) (ctype, error) {
880+
func (call *builtinTrim) compile(c *compiler) (ctype, error) {
876881
str, err := call.Arguments[0].compile(c)
877882
if err != nil {
878883
return ctype{}, err
@@ -932,6 +937,105 @@ func (call builtinTrim) compile(c *compiler) (ctype, error) {
932937
return ctype{Type: sqltypes.VarChar, Flag: flagNullable, Col: col}, nil
933938
}
934939

940+
func (call *builtinSubstring) eval(env *ExpressionEnv) (eval, error) {
941+
str, err := call.Arguments[0].eval(env)
942+
if err != nil || str == nil {
943+
return nil, err
944+
}
945+
946+
tt := str.SQLType()
947+
text, ok := str.(*evalBytes)
948+
if !ok {
949+
text, err = evalToVarchar(str, call.collate, true)
950+
if err != nil {
951+
return nil, err
952+
}
953+
tt = sqltypes.VarChar
954+
}
955+
956+
p, err := call.Arguments[1].eval(env)
957+
if err != nil || p == nil {
958+
return nil, err
959+
}
960+
961+
var l eval
962+
if len(call.Arguments) > 2 {
963+
l, err = call.Arguments[2].eval(env)
964+
if err != nil || l == nil {
965+
return nil, err
966+
}
967+
}
968+
969+
pos := evalToInt64(p).i
970+
if pos == 0 {
971+
return newEvalRaw(tt, nil, text.col), nil
972+
}
973+
cs := colldata.Lookup(text.col.Collation).Charset()
974+
end := int64(charset.Length(cs, text.bytes))
975+
976+
if pos < 0 {
977+
pos += end + 1
978+
}
979+
if pos < 1 || pos > end {
980+
return newEvalRaw(tt, nil, text.col), nil
981+
}
982+
983+
if len(call.Arguments) > 2 {
984+
ll := evalToInt64(l).i
985+
if ll < 1 {
986+
return newEvalRaw(tt, nil, text.col), nil
987+
}
988+
if ll > end-pos+1 {
989+
ll = end - pos + 1
990+
}
991+
end = pos + ll - 1
992+
}
993+
res := charset.Slice(cs, text.bytes, int(pos-1), int(end))
994+
return newEvalRaw(tt, res, text.col), nil
995+
}
996+
997+
func (call *builtinSubstring) compile(c *compiler) (ctype, error) {
998+
str, err := call.Arguments[0].compile(c)
999+
if err != nil {
1000+
return ctype{}, err
1001+
}
1002+
1003+
p, err := call.Arguments[1].compile(c)
1004+
if err != nil {
1005+
return ctype{}, err
1006+
}
1007+
1008+
tt := str.Type
1009+
skip1 := c.compileNullCheck2(str, p)
1010+
1011+
col := typedCoercionCollation(sqltypes.VarChar, c.collation)
1012+
switch {
1013+
case str.isTextual():
1014+
col = str.Col
1015+
default:
1016+
tt = sqltypes.VarChar
1017+
c.asm.Convert_xc(2, tt, col.Collation, 0, false)
1018+
}
1019+
_ = c.compileToInt64(p, 1)
1020+
1021+
cs := colldata.Lookup(str.Col.Collation).Charset()
1022+
var skip2 *jump
1023+
if len(call.Arguments) > 2 {
1024+
l, err := call.Arguments[2].compile(c)
1025+
if err != nil {
1026+
return ctype{}, err
1027+
}
1028+
skip2 = c.compileNullCheck2(str, l)
1029+
_ = c.compileToInt64(l, 1)
1030+
c.asm.Fn_SUBSTRING3(tt, cs, col)
1031+
} else {
1032+
c.asm.Fn_SUBSTRING2(tt, cs, col)
1033+
}
1034+
1035+
c.asm.jumpDestination(skip1, skip2)
1036+
return ctype{Type: tt, Col: col, Flag: flagNullable}, nil
1037+
}
1038+
9351039
type builtinConcat struct {
9361040
CallExpr
9371041
collate collations.ID

go/vt/vtgate/evalengine/testcases/cases.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ var Cases = []TestCase{
7878
{Run: FnLTrim},
7979
{Run: FnRTrim},
8080
{Run: FnTrim},
81+
{Run: FnSubstr},
8182
{Run: FnConcat},
8283
{Run: FnConcatWs},
8384
{Run: FnHex},
@@ -1436,6 +1437,37 @@ func FnTrim(yield Query) {
14361437
}
14371438
}
14381439

1440+
func FnSubstr(yield Query) {
1441+
mysqlDocSamples := []string{
1442+
`SUBSTRING('Quadratically',5)`,
1443+
`SUBSTRING('foobarbar' FROM 4)`,
1444+
`SUBSTRING('Quadratically',5,6)`,
1445+
`SUBSTRING('Sakila', -3)`,
1446+
`SUBSTRING('Sakila', -5, 3)`,
1447+
`SUBSTRING('Sakila' FROM -4 FOR 2)`,
1448+
`SUBSTR('Quadratically',5)`,
1449+
`SUBSTR('foobarbar' FROM 4)`,
1450+
`SUBSTR('Quadratically',5,6)`,
1451+
`SUBSTR('Sakila', -3)`,
1452+
`SUBSTR('Sakila', -5, 3)`,
1453+
`SUBSTR('Sakila' FROM -4 FOR 2)`,
1454+
}
1455+
1456+
for _, q := range mysqlDocSamples {
1457+
yield(q, nil)
1458+
}
1459+
1460+
for _, str := range inputStrings {
1461+
for _, i := range radianInputs {
1462+
yield(fmt.Sprintf("SUBSTRING(%s, %s)", str, i), nil)
1463+
1464+
for _, j := range radianInputs {
1465+
yield(fmt.Sprintf("SUBSTRING(%s, %s, %s)", str, i, j), nil)
1466+
}
1467+
}
1468+
}
1469+
}
1470+
14391471
func FnConcat(yield Query) {
14401472
for _, str := range inputStrings {
14411473
yield(fmt.Sprintf("CONCAT(%s)", str), nil)

go/vt/vtgate/evalengine/translate_builtin.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,31 @@ func (ast *astCompiler) translateCallable(call sqlparser.Callable) (IR, error) {
747747
trim: call.Type,
748748
}, nil
749749

750+
case *sqlparser.SubstrExpr:
751+
var args []IR
752+
str, err := ast.translateExpr(call.Name)
753+
if err != nil {
754+
return nil, err
755+
}
756+
args = append(args, str)
757+
pos, err := ast.translateExpr(call.From)
758+
if err != nil {
759+
return nil, err
760+
}
761+
args = append(args, pos)
762+
763+
if call.To != nil {
764+
to, err := ast.translateExpr(call.To)
765+
if err != nil {
766+
return nil, err
767+
}
768+
args = append(args, to)
769+
}
770+
var cexpr = CallExpr{Arguments: args, Method: "SUBSTRING"}
771+
return &builtinSubstring{
772+
CallExpr: cexpr,
773+
collate: ast.cfg.Collation,
774+
}, nil
750775
case *sqlparser.IntervalDateExpr:
751776
var err error
752777
args := make([]IR, 2)

0 commit comments

Comments
 (0)