-
Notifications
You must be signed in to change notification settings - Fork 6.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Steps toward deprecating implicit prefix seek, related fixes #13026
Changes from 1 commit
cf2f5dd
7441351
ae5cefe
1204f5d
597a9fc
38360b8
62bf4e2
19a6a58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,7 +171,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { | |
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); | ||
CreateAndReopenWithCF({"pikachu"}, options); | ||
|
||
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
|
||
ASSERT_OK(Put(1, "a", "b")); | ||
bool value_found = false; | ||
|
@@ -187,7 +187,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { | |
uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); | ||
ASSERT_TRUE( | ||
db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found)); | ||
ASSERT_TRUE(!value_found); | ||
ASSERT_FALSE(value_found); | ||
// assert that no new files were opened and no new blocks were | ||
// read into block cache. | ||
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); | ||
|
@@ -197,7 +197,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { | |
|
||
numopen = TestGetTickerCount(options, NO_FILE_OPENS); | ||
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); | ||
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); | ||
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); | ||
|
||
|
@@ -207,15 +207,15 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) { | |
|
||
numopen = TestGetTickerCount(options, NO_FILE_OPENS); | ||
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); | ||
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value)); | ||
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); | ||
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); | ||
|
||
ASSERT_OK(Delete(1, "c")); | ||
|
||
numopen = TestGetTickerCount(options, NO_FILE_OPENS); | ||
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD); | ||
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value)); | ||
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "c", &value)); | ||
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); | ||
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); | ||
|
||
|
@@ -2177,24 +2177,130 @@ TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) { | |
|
||
db_->ReleaseSnapshot(snapshot); | ||
} | ||
namespace { | ||
std::pair<uint64_t, uint64_t> GetBloomStat(const Options& options, bool sst) { | ||
if (sst) { | ||
return {options.statistics->getAndResetTickerCount( | ||
NON_LAST_LEVEL_SEEK_FILTER_MATCH), | ||
options.statistics->getAndResetTickerCount( | ||
NON_LAST_LEVEL_SEEK_FILTERED)}; | ||
} else { | ||
auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0); | ||
auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0); | ||
return {hit, miss}; | ||
} | ||
} | ||
|
||
TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) { | ||
constexpr size_t kPrefixSize = 8; | ||
const std::string kKey = "key"; | ||
assert(kKey.size() < kPrefixSize); | ||
std::pair<uint64_t, uint64_t> HitAndMiss(uint64_t hits, uint64_t misses) { | ||
return {hits, misses}; | ||
} | ||
} // namespace | ||
|
||
TEST_F(DBBloomFilterTest, MemtablePrefixBloom) { | ||
Options options = CurrentOptions(); | ||
options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize)); | ||
options.prefix_extractor.reset(NewFixedPrefixTransform(4)); | ||
options.memtable_prefix_bloom_size_ratio = 0.25; | ||
Reopen(options); | ||
ASSERT_OK(Put(kKey, "v")); | ||
ASSERT_EQ("v", Get(kKey)); | ||
std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions())); | ||
iter->Seek(kKey); | ||
ASSERT_FALSE(options.prefix_extractor->InDomain("key")); | ||
ASSERT_OK(Put("key", "v")); | ||
ASSERT_OK(Put("goat1", "g1")); | ||
ASSERT_OK(Put("goat2", "g2")); | ||
|
||
GetBloomStat(options, false); // Reset | ||
|
||
// Out of domain | ||
ASSERT_EQ("v", Get("key")); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
|
||
// In domain | ||
ASSERT_EQ("g1", Get("goat1")); | ||
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false)); | ||
ASSERT_EQ("NOT_FOUND", Get("goat9")); | ||
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false)); | ||
ASSERT_EQ("NOT_FOUND", Get("goan1")); | ||
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false)); | ||
|
||
ReadOptions ropts; | ||
if (options.prefix_seek_opt_in_only) { | ||
ropts.prefix_same_as_start = true; | ||
} | ||
std::unique_ptr<Iterator> iter(db_->NewIterator(ropts)); | ||
// Out of domain | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: just to clarify, for out-of-domain queries, the expectation is that the prefix extractor (and any prefix blooms) will be ignored, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct |
||
iter->Seek("ke"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ("key", iter->key()); | ||
iter->SeekForPrev("kez"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ("key", iter->key()); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
|
||
// In domain | ||
iter->Seek("goan"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_FALSE(iter->Valid()); | ||
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false)); | ||
iter->Seek("goat"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ("goat1", iter->key()); | ||
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false)); | ||
|
||
// Changing prefix extractor should affect prefix query semantics | ||
// and bypass the existing memtable Bloom filter | ||
ASSERT_OK(db_->SetOptions({{"prefix_extractor", "fixed:5"}})); | ||
iter.reset(db_->NewIterator(ropts)); | ||
// Now out of domain | ||
iter->Seek("goan"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ("goat1", iter->key()); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
// In domain | ||
iter->Seek("goat2"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ("goat2", iter->key()); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
// In domain | ||
if (ropts.prefix_same_as_start) { | ||
iter->Seek("goat0"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_FALSE(iter->Valid()); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
} else { | ||
// NOTE: legacy prefix Seek may return keys outside of prefix | ||
} | ||
|
||
// Start a fresh new memtable, using new prefix extractor | ||
ASSERT_OK(SingleDelete("key")); | ||
ASSERT_OK(SingleDelete("goat1")); | ||
ASSERT_OK(SingleDelete("goat2")); | ||
ASSERT_OK(Flush()); | ||
|
||
ASSERT_OK(Put("goat1", "g1")); | ||
ASSERT_OK(Put("goat2", "g2")); | ||
|
||
iter.reset(db_->NewIterator(ropts)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe toss in some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do |
||
|
||
// Now out of domain | ||
iter->Seek("goan"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ(kKey, iter->key()); | ||
iter->SeekForPrev(kKey); | ||
ASSERT_EQ("goat1", iter->key()); | ||
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false)); | ||
// In domain | ||
iter->Seek("goat2"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(iter->Valid()); | ||
ASSERT_EQ(kKey, iter->key()); | ||
ASSERT_EQ("goat2", iter->key()); | ||
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false)); | ||
// In domain | ||
iter->Seek("goat0"); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_FALSE(iter->Valid()); | ||
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false)); | ||
} | ||
|
||
class DBBloomFilterTestVaryPrefixAndFormatVer | ||
|
@@ -2507,7 +2613,11 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { | |
ASSERT_OK(Put(key1, value1, WriteOptions())); | ||
ASSERT_OK(Put(key3, value3, WriteOptions())); | ||
|
||
std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions())); | ||
ReadOptions ropts; | ||
if (options_.prefix_seek_opt_in_only) { | ||
ropts.prefix_same_as_start = true; | ||
} | ||
std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ropts)); | ||
|
||
// check memtable bloom stats | ||
iter->Seek(key1); | ||
|
@@ -2526,13 +2636,13 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { | |
|
||
iter->Seek(key2); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(!iter->Valid()); | ||
ASSERT_FALSE(iter->Valid()); | ||
ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); | ||
ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); | ||
|
||
ASSERT_OK(Flush()); | ||
|
||
iter.reset(dbfull()->NewIterator(ReadOptions())); | ||
iter.reset(dbfull()->NewIterator(ropts)); | ||
|
||
// Check SST bloom stats | ||
iter->Seek(key1); | ||
|
@@ -2550,7 +2660,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { | |
|
||
iter->Seek(key2); | ||
ASSERT_OK(iter->status()); | ||
ASSERT_TRUE(!iter->Valid()); | ||
ASSERT_FALSE(iter->Valid()); | ||
ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count); | ||
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count); | ||
} | ||
|
@@ -2659,9 +2769,14 @@ TEST_F(DBBloomFilterTest, PrefixScan) { | |
PrefixScanInit(this); | ||
count = 0; | ||
env_->random_read_counter_.Reset(); | ||
iter = db_->NewIterator(ReadOptions()); | ||
ReadOptions ropts; | ||
if (options.prefix_seek_opt_in_only) { | ||
ropts.prefix_same_as_start = true; | ||
} | ||
iter = db_->NewIterator(ropts); | ||
for (iter->Seek(prefix); iter->Valid(); iter->Next()) { | ||
if (!iter->key().starts_with(prefix)) { | ||
ASSERT_FALSE(ropts.prefix_same_as_start); | ||
break; | ||
} | ||
count++; | ||
|
@@ -3397,23 +3512,6 @@ class FixedSuffix4Transform : public SliceTransform { | |
|
||
bool InDomain(const Slice& src) const override { return src.size() >= 4; } | ||
}; | ||
|
||
std::pair<uint64_t, uint64_t> GetBloomStat(const Options& options, bool sst) { | ||
if (sst) { | ||
return {options.statistics->getAndResetTickerCount( | ||
NON_LAST_LEVEL_SEEK_FILTER_MATCH), | ||
options.statistics->getAndResetTickerCount( | ||
NON_LAST_LEVEL_SEEK_FILTERED)}; | ||
} else { | ||
auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0); | ||
auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0); | ||
return {hit, miss}; | ||
} | ||
} | ||
|
||
std::pair<uint64_t, uint64_t> HitAndMiss(uint64_t hits, uint64_t misses) { | ||
return {hits, misses}; | ||
} | ||
} // anonymous namespace | ||
|
||
// This uses a prefix_extractor + comparator combination that violates | ||
|
@@ -3447,9 +3545,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) { | |
ASSERT_OK(Flush()); | ||
} | ||
ReadOptions read_options; | ||
if (flushed) { // TODO: support auto_prefix_mode in memtable? | ||
read_options.auto_prefix_mode = true; | ||
} | ||
read_options.auto_prefix_mode = true; | ||
EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0)); | ||
{ | ||
Slice ub("999aaaa"); | ||
|
@@ -3517,9 +3613,8 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) { | |
ASSERT_OK(Flush()); | ||
} | ||
ReadOptions read_options; | ||
if (flushed) { // TODO: support auto_prefix_mode in memtable? | ||
read_options.auto_prefix_mode = true; | ||
} else { | ||
read_options.auto_prefix_mode = true; | ||
if (!flushed) { | ||
// TODO: why needed? | ||
get_perf_context()->bloom_memtable_hit_count = 0; | ||
get_perf_context()->bloom_memtable_miss_count = 0; | ||
|
@@ -3661,9 +3756,8 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) { | |
ASSERT_OK(Flush()); | ||
} | ||
ReadOptions read_options; | ||
if (flushed) { // TODO: support auto_prefix_mode in memtable? | ||
read_options.auto_prefix_mode = true; | ||
} else { | ||
read_options.auto_prefix_mode = true; | ||
if (!flushed) { | ||
// TODO: why needed? | ||
get_perf_context()->bloom_memtable_hit_count = 0; | ||
get_perf_context()->bloom_memtable_miss_count = 0; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2066,15 +2066,18 @@ InternalIterator* DBImpl::NewInternalIterator( | |
bool allow_unprepared_value, ArenaWrappedDBIter* db_iter) { | ||
InternalIterator* internal_iter; | ||
assert(arena != nullptr); | ||
auto prefix_extractor = | ||
super_version->mutable_cf_options.prefix_extractor.get(); | ||
// Need to create internal iterator from the arena. | ||
MergeIteratorBuilder merge_iter_builder( | ||
&cfd->internal_comparator(), arena, | ||
!read_options.total_order_seek && | ||
super_version->mutable_cf_options.prefix_extractor != nullptr, | ||
!read_options.total_order_seek && // FIXME? | ||
prefix_extractor != nullptr, | ||
read_options.iterate_upper_bound); | ||
// Collect iterator for mutable memtable | ||
auto mem_iter = super_version->mem->NewIterator( | ||
read_options, super_version->GetSeqnoToTimeMapping(), arena); | ||
read_options, super_version->GetSeqnoToTimeMapping(), arena, | ||
super_version->mutable_cf_options.prefix_extractor.get()); | ||
Status s; | ||
if (!read_options.ignore_range_deletions) { | ||
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter; | ||
|
@@ -2098,6 +2101,7 @@ InternalIterator* DBImpl::NewInternalIterator( | |
if (s.ok()) { | ||
super_version->imm->AddIterators( | ||
read_options, super_version->GetSeqnoToTimeMapping(), | ||
super_version->mutable_cf_options.prefix_extractor.get(), | ||
&merge_iter_builder, !read_options.ignore_range_deletions); | ||
} | ||
TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s); | ||
|
@@ -3805,6 +3809,9 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, | |
read_options.io_activity = Env::IOActivity::kDBIterator; | ||
} | ||
|
||
read_options.total_order_seek |= | ||
immutable_db_options_.prefix_seek_opt_in_only; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this the best (or only) place to do this? We also have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixing. I'll have an assertion in DBIter::DBIter that |
||
if (read_options.managed) { | ||
return NewErrorIterator( | ||
Status::NotSupported("Managed iterator is not supported anymore.")); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Q: Do we have to pass the prefix extractor here? We don't pass it to the memtable iterator, and
MergeIteratorBuilder
is in non-prefix modeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixing