From 3c3ea2c205cf2b86ebc214aac54e1d5801c06abc Mon Sep 17 00:00:00 2001 From: Annie Pompa Date: Tue, 2 Dec 2025 13:32:11 -0500 Subject: [PATCH 1/2] cockroachkvs: use bytes.Compare when comparing key version This patch uses bytes.Compare to compare key versions instead of ComparePointSuffixes; the latter calls on `normalizeEngineSuffixForCompare` - which could end up strip additional bytes from the `suffix` (comparing only a prefix of the suffix) if the last byte of the version happens to match one of the sentinel lengths defined. Fixes: #158463 --- cockroachkvs/cockroachkvs.go | 67 ++++++++++++++++----- cockroachkvs/testdata/key_schema_key_seeker | 59 ++++++++++++++++++ 2 files changed, 110 insertions(+), 16 deletions(-) diff --git a/cockroachkvs/cockroachkvs.go b/cockroachkvs/cockroachkvs.go index 9f84192d20..ae85adc84c 100644 --- a/cockroachkvs/cockroachkvs.go +++ b/cockroachkvs/cockroachkvs.go @@ -251,7 +251,9 @@ func decodeMVCCTimestamp(encodedTS []byte) (wallTime uint64, logical uint32, err return wallTime, logical, nil } -// DecodeEngineKey decodes the given bytes as an EngineKey. +// DecodeEngineKey decodes the given bytes as an EngineKey, returning the roach +// key (excluding the sentinel byte) and version, excluding the length byte at +// the end of the engine key. func DecodeEngineKey(b []byte) (roachKey, version []byte, ok bool) { if len(b) == 0 { return nil, nil, false @@ -369,6 +371,22 @@ func ComparePointSuffixes(a, b []byte) int { return bytes.Compare(normalizeEngineSuffixForCompare(b), normalizeEngineSuffixForCompare(a)) } +// compareVersions compares a version with a first row's untyped version +// to determine if the version is a lower bound. Empty suffixes sort before +// non-empty suffixes. Returns true if version >= firstRowUntypedVer. +func compareVersions(version, firstRowUntypedVer []byte) bool { + switch { + case len(version) == 0: + // This includes the case where both versions are empty. + return true + case len(firstRowUntypedVer) == 0: + return false + default: + normalizedVersion := normalizeVersionForCompare(version) + return bytes.Compare(normalizedVersion, firstRowUntypedVer) >= 0 + } +} + // Equal implements base.Equal for Cockroach keys. func Equal(a, b []byte) bool { if len(a) == 0 || len(b) == 0 { @@ -407,20 +425,14 @@ func Equal(a, b []byte) bool { var zeroLogical [4]byte -// normalizeEngineSuffixForCompare takes a non-empty key suffix (including the -// trailing sentinel byte) and returns a prefix of the buffer that should be -// used for byte-wise comparison. It trims the trailing suffix length byte and -// any other trailing bytes that need to be ignored (like a synthetic bit or -// zero logical component). +// normalizeVersionForCompare takes a version (without the trailing length byte) +// and returns a prefix of the buffer that should be used for byte-wise +// comparison. It trims any trailing bytes that need to be ignored (like a +// synthetic bit or zero logical component). Noop if version is a non-MVCC +// version. // //gcassert:inline -func normalizeEngineSuffixForCompare(a []byte) []byte { - // Check sentinel byte. - if invariants.Enabled && len(a) != int(a[len(a)-1]) { - panic(errors.AssertionFailedf("malformed suffix: %x (length byte is %d; but suffix is %d bytes)", a, a[len(a)-1], len(a))) - } - // Strip off sentinel byte. - a = a[:len(a)-1] +func normalizeVersionForCompare(a []byte) []byte { switch len(a) { case engineKeyVersionWallLogicalAndSyntheticTimeLen: // Strip the synthetic bit component from the timestamp version. The @@ -454,6 +466,23 @@ func normalizeEngineSuffixForCompare(a []byte) []byte { return a } +// normalizeEngineSuffixForCompare takes a non-empty key suffix (including the +// trailing sentinel byte) and returns a prefix of the buffer that should be +// used for byte-wise comparison. It trims the trailing suffix length byte and +// any other trailing bytes that need to be ignored (like a synthetic bit or +// zero logical component). Noop if suffix is a non-MVCC suffix. +// +//gcassert:inline +func normalizeEngineSuffixForCompare(a []byte) []byte { + // Check sentinel byte. + if invariants.Enabled && len(a) != int(a[len(a)-1]) { + panic(errors.AssertionFailedf("malformed suffix: %x (length byte is %d; but suffix is %d bytes)", a, a[len(a)-1], len(a))) + } + // Strip off sentinel byte. + a = a[:len(a)-1] + return normalizeVersionForCompare(a) +} + func getKeyPartFromEngineKey(engineKey []byte) (key []byte, ok bool) { if len(engineKey) == 0 { return nil, false @@ -780,8 +809,13 @@ func (ks *cockroachKeySeeker) IsLowerBound(k []byte, syntheticSuffix []byte) boo } // If there's a synthetic suffix, we ignore the block's suffix columns and // compare the key's suffix to the synthetic suffix. + // + // NB: DecodeEngineKey excludes the length byte at the end, and roachKey + // excludes the sentinel byte. The suffix below is the same as k[Split(k):], + // which gives us the version plus the length byte. + suffix := k[len(roachKey)+1:] if len(syntheticSuffix) > 0 { - return ComparePointSuffixes(syntheticSuffix, k[len(roachKey)+1:]) >= 0 + return ComparePointSuffixes(syntheticSuffix, suffix) >= 0 } firstRowWall := ks.mvccWallTimes.At(0) firstLogical := ks.mvccLogical.At(0) @@ -789,7 +823,8 @@ func (ks *cockroachKeySeeker) IsLowerBound(k []byte, syntheticSuffix []byte) boo // is either (a) unversioned (and sorts before all other suffixes) or (b) is // a non-MVCC key with an untyped version. if firstRowWall == 0 && firstLogical == 0 { - return ComparePointSuffixes(ks.untypedVersions.At(0), version) >= 0 + firstRowUntypedVer := ks.untypedVersions.At(0) + return compareVersions(version, firstRowUntypedVer) } var wallTime uint64 @@ -825,7 +860,7 @@ func (ks *cockroachKeySeeker) IsLowerBound(k []byte, syntheticSuffix []byte) boo buf[12] = 13 firstRowSuffix = buf[:13] } - return ComparePointSuffixes(firstRowSuffix, version) >= 0 + return bytes.Compare(version, firstRowSuffix[:len(firstRowSuffix)-1]) >= 0 } // NB: The sign comparison is inverted because suffixes are sorted such that diff --git a/cockroachkvs/testdata/key_schema_key_seeker b/cockroachkvs/testdata/key_schema_key_seeker index 5eff3c6494..c39f54ee36 100644 --- a/cockroachkvs/testdata/key_schema_key_seeker +++ b/cockroachkvs/testdata/key_schema_key_seeker @@ -241,3 +241,62 @@ IsLowerBound(moo @ 3000000000,1, "") = true IsLowerBound(moo @ 3000000000,0, "") = true IsLowerBound(moo @ 0000000000,1, "") = true IsLowerBound(moo @ 02073a83c45688420eaf97824255790f1e12, "") = true + +# Regression test for cockroach/#158463. This test uses a lock table key (a +# non-MVCC key with an untyped version) as the first key in the block. +# Previously, we would incorrectly use the key version as a full suffix and +# call ComparePointSuffixes with it. + +define-block +moo @ 02073a83c45688420eaf97824255790f1e +moo @ 3000000002,1 +moo @ 3000000001,1 +moo @ 0000000001,0 +---- +Parse(moo @ 02073a83c45688420eaf97824255790f1e) = hex:6d6f6f0002073a83c45688420eaf97824255790f1e12 +Parse(moo @ 3000000002,1) = hex:6d6f6f0000000000b2d05e02000000010d +Parse(moo @ 3000000001,1) = hex:6d6f6f0000000000b2d05e01000000010d +Parse(moo @ 0000000001,0) = hex:6d6f6f00000000000000000109 + +is-lower-bound +moo @ 03073a83c45688420eaf97824255790f1e +moo @ 9000000000,2 +moo @ 8000000000,2 +moo @ 8000000000,1 +moo @ 8000000000,0 +moo @ 7000000000,9 +moo @ 3000000000,2 +moo @ 3000000000,1 +moo @ 3000000000,0 +moo @ 0000000000,1 +---- +IsLowerBound(moo @ 03073a83c45688420eaf97824255790f1e, "") = true +IsLowerBound(moo @ 9000000000,2, "") = false +IsLowerBound(moo @ 8000000000,2, "") = false +IsLowerBound(moo @ 8000000000,1, "") = false +IsLowerBound(moo @ 8000000000,0, "") = false +IsLowerBound(moo @ 7000000000,9, "") = false +IsLowerBound(moo @ 3000000000,2, "") = false +IsLowerBound(moo @ 3000000000,1, "") = false +IsLowerBound(moo @ 3000000000,0, "") = false +IsLowerBound(moo @ 0000000000,1, "") = false + +# Test case for key with non-standard version length. + +define-block +moo @ 3000000000,1 +moo @ 3000000002,1 +moo @ 3000000001,1 +---- +Parse(moo @ 3000000000,1) = hex:6d6f6f0000000000b2d05e00000000010d +Parse(moo @ 3000000002,1) = hex:6d6f6f0000000000b2d05e02000000010d +Parse(moo @ 3000000001,1) = hex:6d6f6f0000000000b2d05e01000000010d + +is-lower-bound +moo @ 0102030405 +moo @ ffffffffffffffffffffffffffffffff +moo @ 00 +---- +IsLowerBound(moo @ 0102030405, "") = true +IsLowerBound(moo @ ffffffffffffffffffffffffffffffff, "") = true +IsLowerBound(moo @ 00, "") = false From b545302d11f5d09645b787ac9ae266d8fe1bf4e4 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Wed, 3 Dec 2025 17:52:40 -0800 Subject: [PATCH 2/2] cockroachkvs: add randomized test for IsLowerBound --- cockroachkvs/cockroachkvs_test.go | 80 ++++++++++++++++++++++++++++--- internal/base/comparer.go | 8 ++-- 2 files changed, 78 insertions(+), 10 deletions(-) diff --git a/cockroachkvs/cockroachkvs_test.go b/cockroachkvs/cockroachkvs_test.go index 3f7e9c8543..582d6c3595 100644 --- a/cockroachkvs/cockroachkvs_test.go +++ b/cockroachkvs/cockroachkvs_test.go @@ -30,17 +30,34 @@ import ( testify "github.com/stretchr/testify/require" ) -func TestComparer(t *testing.T) { +func testPrefixes() [][]byte { prefixes := [][]byte{ EncodeMVCCKey(nil, []byte("abc"), 0, 0), EncodeMVCCKey(nil, []byte("d"), 0, 0), EncodeMVCCKey(nil, []byte("ef"), 0, 0), } + for range 10 { + roachKey := make([]byte, 1+rand.IntN(100)) + for i := range roachKey { + roachKey[i] = byte(rand.Uint32()) + } + prefixes = append(prefixes, EncodeMVCCKey(nil, roachKey, 0, 0)) + } + return prefixes +} +func testSuffixes(t *testing.T) [][]byte { suffixes := [][]byte{{}} - for walltime := 3; walltime > 0; walltime-- { - for logical := 2; logical >= 0; logical-- { - key := EncodeMVCCKey(nil, []byte("foo"), uint64(walltime), uint32(logical)) + + wallTimes := []uint64{1, 2, 3} + logicalTimes := []uint32{0, 1, 2} + for range 5 { + wallTimes = append(wallTimes, rand.Uint64()) + logicalTimes = append(logicalTimes, rand.Uint32()) + } + for _, walltime := range wallTimes { + for _, logical := range logicalTimes { + key := EncodeMVCCKey(nil, []byte("foo"), walltime, logical) suffix := key[Comparer.Split(key):] suffixes = append(suffixes, suffix) @@ -51,7 +68,7 @@ func TestComparer(t *testing.T) { if Comparer.CompareRangeSuffixes(suffix, newSuffix) != 1 { t.Fatalf("expected suffixes %x < %x", suffix, newSuffix) } - if Comparer.Compare(slices.Concat(prefixes[0], suffix), slices.Concat(prefixes[0], newSuffix)) != 0 { + if Comparer.Compare(slices.Concat([]byte("foo"), suffix), slices.Concat([]byte("foo"), newSuffix)) != 0 { t.Fatalf("expected keys with suffixes %x and %x to be equal", suffix, newSuffix) } suffixes = append(suffixes, newSuffix) @@ -66,15 +83,31 @@ func TestComparer(t *testing.T) { if Comparer.CompareRangeSuffixes(suffix, newSuffix) != 1 { t.Fatalf("expected suffixes %x < %x", suffix, newSuffix) } - if Comparer.Compare(slices.Concat(prefixes[0], suffix), slices.Concat(prefixes[0], newSuffix)) != 0 { + if Comparer.Compare(slices.Concat([]byte("foo"), suffix), slices.Concat([]byte("foo"), newSuffix)) != 0 { t.Fatalf("expected keys with suffixes %x and %x to be equal", suffix, newSuffix) } suffixes = append(suffixes, newSuffix) } } // Add some lock table suffixes. + suffixes = append(suffixes, append(bytes.Repeat([]byte{0}, engineKeyVersionLockTableLen), suffixLenWithLockTable)) suffixes = append(suffixes, append(bytes.Repeat([]byte{1}, engineKeyVersionLockTableLen), suffixLenWithLockTable)) suffixes = append(suffixes, append(bytes.Repeat([]byte{2}, engineKeyVersionLockTableLen), suffixLenWithLockTable)) + suffixes = append(suffixes, append(bytes.Repeat([]byte{0xFF}, engineKeyVersionLockTableLen), suffixLenWithLockTable)) + for range 5 { + v := make([]byte, engineKeyVersionLockTableLen+1) + for i := 0; i < engineKeyVersionLockTableLen; i++ { + v[i] = byte(rand.Uint32()) + } + v[engineKeyVersionLockTableLen] = suffixLenWithLockTable + suffixes = append(suffixes, v) + } + return suffixes +} + +func TestComparer(t *testing.T) { + prefixes := testPrefixes() + suffixes := testSuffixes(t) if err := base.CheckComparer(&Comparer, prefixes, suffixes); err != nil { t.Error(err) } @@ -294,6 +327,41 @@ func TestKeySchema_KeySeeker(t *testing.T) { }) } +func TestKeySeekerIsLowerBound(t *testing.T) { + var keys [][]byte + for _, prefix := range testPrefixes() { + for _, suffix := range testSuffixes(t) { + keys = append(keys, slices.Concat(prefix, suffix)) + } + } + var enc colblk.DataBlockEncoder + enc.Init(&KeySchema) + for _, k := range keys { + enc.Reset() + kcmp := enc.KeyWriter.ComparePrev(k) + ikey := base.InternalKey{ + UserKey: k, + Trailer: pebble.MakeInternalKeyTrailer(0, base.InternalKeyKindSet), + } + enc.Add(ikey, k, block.InPlaceValuePrefix(false), kcmp, false /* isObsolete */) + blk, _ := enc.Finish(1, enc.Size()) + var dec colblk.DataBlockDecoder + bd := dec.Init(&KeySchema, blk) + ksPointer := &cockroachKeySeeker{} + KeySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec, bd) + ks := KeySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer))) + + for _, k2 := range keys { + isLowerBound := ks.IsLowerBound(k2, nil /* syntheticSuffix */) + expected := Comparer.Compare(k, k2) >= 0 + if isLowerBound != expected { + t.Errorf("key: %s IsLowerBound(%s) = %t; expected %t", + formatUserKey(k), formatUserKey(k2), isLowerBound, expected) + } + } + } +} + func getSyntheticSuffix(t *testing.T, td *datadriven.TestData) ([]byte, string, bool) { var syntheticSuffix []byte var syntheticSuffixStr string diff --git a/internal/base/comparer.go b/internal/base/comparer.go index 901fec4681..df5235c8a2 100644 --- a/internal/base/comparer.go +++ b/internal/base/comparer.go @@ -8,6 +8,7 @@ import ( "bytes" "encoding/binary" "fmt" + "math/rand/v2" "slices" "strconv" "unicode/utf8" @@ -509,11 +510,10 @@ func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error { return errors.Errorf("ComparePointSuffixes is inconsistent") } - n := len(prefixes) // Removing leading bytes from prefixes must yield valid prefixes. - for i := 0; i < n; i++ { - for j := 1; j < len(prefixes[i]); j++ { - prefixes = append(prefixes, prefixes[i][j:]) + for _, p := range prefixes { + if len(p) > 1 { + prefixes = append(prefixes, p[1+rand.IntN(len(p)-1):]) } }