Skip to content

Commit 9422e32

Browse files
Tool to determine mapping from vindex and value to shard (#17290)
Signed-off-by: Rohit Nayak <rohit@planetscale.com>
1 parent aad2e46 commit 9422e32

File tree

4 files changed

+366
-0
lines changed

4 files changed

+366
-0
lines changed

tools/map-shard-for-value/Makefile

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright 2024 The Vitess Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
build:
16+
go build map-shard-for-value.go
17+
18+
test:
19+
echo "1\n-1\n99" | go run map-shard-for-value.go --total_shards=4 --vindex=xxhash
20+
21+
clean:
22+
rm -f map-shard-for-value
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"bufio"
21+
"context"
22+
"encoding/hex"
23+
"fmt"
24+
"log"
25+
"os"
26+
"strconv"
27+
"strings"
28+
29+
flag "github.com/spf13/pflag"
30+
31+
"vitess.io/vitess/go/vt/topo"
32+
33+
"vitess.io/vitess/go/sqltypes"
34+
"vitess.io/vitess/go/vt/key"
35+
"vitess.io/vitess/go/vt/proto/topodata"
36+
"vitess.io/vitess/go/vt/vtgate/vindexes"
37+
)
38+
39+
/*
40+
* This tool reads a list of values from stdin and prints the
41+
* corresponding keyspace ID and shard for each value. It uses the given vindex
42+
* and shard ranges to determine the shard. The vindex is expected to be a
43+
* single-column vindex. The shard ranges are specified as a comma-separated
44+
* list of key ranges, example "-80,80-".
45+
* If you have uniformly distributed shards, you can specify the total number
46+
* of shards using the -total_shards flag, and the tool will generate the shard ranges
47+
* using the same logic as the Vitess operator does (using the key.GenerateShardRanges() function).
48+
*
49+
* Example usage:
50+
* echo "1\n2\n3" | go run shard-from-id.go -vindex=hash -shards=-80,80-
51+
*
52+
* Currently tested only for integer values and hash/xxhash vindexes.
53+
*/
54+
55+
func mapShard(allShards []*topodata.ShardReference, ksid key.DestinationKeyspaceID) (string, error) {
56+
foundShard := ""
57+
addShard := func(shard string) error {
58+
foundShard = shard
59+
return nil
60+
}
61+
if err := ksid.Resolve(allShards, addShard); err != nil {
62+
return "", fmt.Errorf("failed to resolve keyspace ID: %v:: %s", ksid.String(), err)
63+
}
64+
65+
if foundShard == "" {
66+
return "", fmt.Errorf("no shard found for keyspace ID: %v", ksid)
67+
}
68+
return foundShard, nil
69+
}
70+
71+
func selectShard(vindex vindexes.Vindex, value sqltypes.Value, allShards []*topodata.ShardReference) (string, key.DestinationKeyspaceID, error) {
72+
ctx := context.Background()
73+
74+
destinations, err := vindexes.Map(ctx, vindex, nil, [][]sqltypes.Value{{value}})
75+
if err != nil {
76+
return "", nil, fmt.Errorf("failed to map value to keyspace ID: %w", err)
77+
}
78+
79+
if len(destinations) != 1 {
80+
return "", nil, fmt.Errorf("unexpected number of destinations: %d", len(destinations))
81+
}
82+
83+
ksid, ok := destinations[0].(key.DestinationKeyspaceID)
84+
if !ok {
85+
return "", nil, fmt.Errorf("unexpected destination type: %T", destinations[0])
86+
}
87+
88+
foundShard, err := mapShard(allShards, ksid)
89+
if err != nil {
90+
return "", nil, fmt.Errorf("failed to map shard, original value %v, keyspace id %s: %w", value, ksid, err)
91+
}
92+
return foundShard, ksid, nil
93+
}
94+
95+
func getValue(valueStr, valueType string) (sqltypes.Value, error) {
96+
var value sqltypes.Value
97+
98+
switch valueType {
99+
case "int":
100+
valueInt, err := strconv.ParseInt(valueStr, 10, 64)
101+
if err != nil {
102+
return value, fmt.Errorf("failed to parse int value: %w", err)
103+
}
104+
value = sqltypes.NewInt64(valueInt)
105+
case "uint":
106+
valueUint, err := strconv.ParseUint(valueStr, 10, 64)
107+
if err != nil {
108+
return value, fmt.Errorf("failed to parse uint value: %w", err)
109+
}
110+
value = sqltypes.NewUint64(valueUint)
111+
case "string":
112+
value = sqltypes.NewVarChar(valueStr)
113+
default:
114+
return value, fmt.Errorf("unsupported value type: %s", valueType)
115+
}
116+
117+
return value, nil
118+
}
119+
120+
func getShardMap(shardsCSV *string) []*topodata.ShardReference {
121+
var allShards []*topodata.ShardReference
122+
123+
for _, shard := range strings.Split(*shardsCSV, ",") {
124+
_, keyRange, err := topo.ValidateShardName(shard)
125+
if err != nil {
126+
log.Fatalf("invalid shard range: %s", shard)
127+
}
128+
allShards = append(allShards, &topodata.ShardReference{
129+
Name: shard,
130+
KeyRange: keyRange,
131+
})
132+
}
133+
return allShards
134+
}
135+
136+
type output struct {
137+
Value string
138+
KeyspaceID string
139+
Shard string
140+
}
141+
142+
func processValues(scanner *bufio.Scanner, shardsCSV *string, vindexName string, valueType string) ([]output, error) {
143+
allShards := getShardMap(shardsCSV)
144+
145+
vindex, err := vindexes.CreateVindex(vindexName, vindexName, nil)
146+
if err != nil {
147+
return nil, fmt.Errorf("failed to create vindex: %v", err)
148+
}
149+
var outputs []output
150+
for scanner.Scan() {
151+
valueStr := scanner.Text()
152+
if valueStr == "" {
153+
continue
154+
}
155+
value, err := getValue(valueStr, valueType)
156+
if err != nil {
157+
return nil, fmt.Errorf("failed to get value for: %v, value_type %s:: %v", valueStr, valueType, err)
158+
}
159+
shard, ksid, err := selectShard(vindex, value, allShards)
160+
if err != nil {
161+
// ignore errors so that we can go ahead with the computation for other values
162+
continue
163+
}
164+
outputs = append(outputs, output{Value: valueStr, KeyspaceID: hex.EncodeToString(ksid), Shard: shard})
165+
}
166+
return outputs, nil
167+
}
168+
169+
func printOutput(outputs []output) {
170+
fmt.Println("value,keyspaceID,shard")
171+
for _, output := range outputs {
172+
fmt.Printf("%s,%s,%s\n", output.Value, output.KeyspaceID, output.Shard)
173+
}
174+
}
175+
176+
func main() {
177+
// Explicitly configuring the logger since it was flaky in displaying logs locally without this.
178+
log.SetOutput(os.Stderr)
179+
log.SetFlags(log.LstdFlags)
180+
log.SetPrefix("LOG: ")
181+
182+
vindexName := flag.String("vindex", "xxhash", "name of the vindex")
183+
shardsCSV := flag.String("shards", "", "comma-separated list of shard ranges")
184+
totalShards := flag.Int("total_shards", 0, "total number of uniformly distributed shards")
185+
valueType := flag.String("value_type", "int", "type of the value (int, uint, or string)")
186+
flag.Parse()
187+
188+
if *totalShards > 0 {
189+
if *shardsCSV != "" {
190+
log.Fatalf("cannot specify both total_shards and shards")
191+
}
192+
shardArr, err := key.GenerateShardRanges(*totalShards)
193+
if err != nil {
194+
log.Fatalf("failed to generate shard ranges: %v", err)
195+
}
196+
*shardsCSV = strings.Join(shardArr, ",")
197+
}
198+
if *shardsCSV == "" {
199+
log.Fatal("shards or total_shards must be specified")
200+
}
201+
scanner := bufio.NewScanner(os.Stdin)
202+
outputs, err := processValues(scanner, shardsCSV, *vindexName, *valueType)
203+
if err != nil {
204+
log.Fatalf("failed to process values: %v", err)
205+
}
206+
printOutput(outputs)
207+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
## Map Shard for Value Tool
2+
3+
### Overview
4+
5+
The `map-shard-for-value` tool maps a given value to a specific shard. This tool helps in determining
6+
which shard a particular value belongs to, based on the vindex algorithm and shard ranges.
7+
8+
### Features
9+
-
10+
11+
- Allows specifying the vindex type (e.g., `hash`, `xxhash`).
12+
- Allows specifying the shard list of (for uniformly distributed shard ranges) the total number of shards to generate.
13+
- Designed as a _filter_: Reads input values from `stdin` and outputs the corresponding shard information, so it can be
14+
used to map values from a file or another program.
15+
16+
### Usage
17+
18+
```sh
19+
make build
20+
```
21+
22+
```sh
23+
echo "1\n-1\n99" | ./map-shard-for-value --total_shards=4 --vindex=xxhash
24+
value,keyspaceID,shard
25+
1,d46405367612b4b7,c0-
26+
-1,d8e2a6a7c8c7623d,c0-
27+
99,200533312244abca,-40
28+
29+
echo "1\n-1\n99" | ./map-shard-for-value --vindex=hash --shards="-80,80-"
30+
value,keyspaceID,shard
31+
1,166b40b44aba4bd6,-80
32+
-1,355550b2150e2451,-80
33+
99,2c40ad56f4593c47,-80
34+
```
35+
36+
#### Flags
37+
38+
- `--vindex`: Specifies the name of the vindex to use (e.g., `hash`, `xxhash`) (default `xxhash`)
39+
40+
One (and only one) of these is required:
41+
42+
- `--shards`: Comma-separated list of shard ranges
43+
- `--total_shards`: Total number of shards, only if shards are uniformly distributed
44+
45+
Optional:
46+
- `--value_type`: Type of the value to map, one of int, uint, string (default `int`)
47+
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"bufio"
21+
"fmt"
22+
"strings"
23+
"testing"
24+
25+
"github.com/stretchr/testify/require"
26+
)
27+
28+
func TestProcess(t *testing.T) {
29+
type testCase struct {
30+
name string
31+
shardsCSV string
32+
vindexType string
33+
values []int
34+
valueType string
35+
expected []output
36+
}
37+
testCases := []testCase{
38+
{
39+
name: "hash,2 shards",
40+
shardsCSV: "-80,80-",
41+
vindexType: "hash",
42+
values: []int{1, 99},
43+
valueType: "int",
44+
expected: []output{
45+
{
46+
Value: "1",
47+
KeyspaceID: "166b40b44aba4bd6",
48+
Shard: "-80",
49+
},
50+
{
51+
Value: "99",
52+
KeyspaceID: "2c40ad56f4593c47",
53+
Shard: "-80",
54+
},
55+
},
56+
},
57+
{
58+
name: "xxhash,4 shards",
59+
shardsCSV: "-40,40-80,80-c0,c0-",
60+
vindexType: "xxhash",
61+
values: []int{1, 99},
62+
valueType: "int",
63+
expected: []output{
64+
{
65+
Value: "1",
66+
KeyspaceID: "d46405367612b4b7",
67+
Shard: "c0-",
68+
},
69+
{
70+
Value: "99",
71+
KeyspaceID: "200533312244abca",
72+
Shard: "-40",
73+
},
74+
},
75+
},
76+
}
77+
for _, tc := range testCases {
78+
t.Run(tc.name, func(t *testing.T) {
79+
var input strings.Builder
80+
for _, num := range tc.values {
81+
fmt.Fprintf(&input, "%d\n", num)
82+
}
83+
reader := strings.NewReader(input.String())
84+
scanner := bufio.NewScanner(reader)
85+
got, err := processValues(scanner, &tc.shardsCSV, tc.vindexType, tc.valueType)
86+
require.NoError(t, err)
87+
require.EqualValues(t, tc.expected, got)
88+
})
89+
}
90+
}

0 commit comments

Comments
 (0)