Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Steps toward deprecating implicit prefix seek, related fixes #13026

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions db/column_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1201,10 +1201,12 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
read_opts.total_order_seek = true;
MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
merge_iter_builder.AddIterator(super_version->mem->NewIterator(
read_opts, /*seqno_to_time_mapping=*/nullptr, &arena));
super_version->imm->AddIterators(read_opts, /*seqno_to_time_mapping=*/nullptr,
&merge_iter_builder,
false /* add_range_tombstone_iter */);
read_opts, /*seqno_to_time_mapping=*/nullptr, &arena,
/*prefix_extractor=*/nullptr));
super_version->imm->AddIterators(
read_opts, /*seqno_to_time_mapping=*/nullptr,
super_version->mutable_cf_options.prefix_extractor.get(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: Do we have to pass the prefix extractor here? We don't pass it to the memtable iterator, and MergeIteratorBuilder is in non-prefix mode

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixing

&merge_iter_builder, false /* add_range_tombstone_iter */);
ScopedArenaPtr<InternalIterator> memtable_iter(merge_iter_builder.Finish());

auto read_seq = super_version->current->version_set()->LastSequence();
Expand Down
190 changes: 142 additions & 48 deletions db/db_bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
CreateAndReopenWithCF({"pikachu"}, options);

ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value));

ASSERT_OK(Put(1, "a", "b"));
bool value_found = false;
Expand All @@ -187,7 +187,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {
uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
ASSERT_TRUE(
db_->KeyMayExist(ropts, handles_[1], "a", &value, &value_found));
ASSERT_TRUE(!value_found);
ASSERT_FALSE(value_found);
// assert that no new files were opened and no new blocks were
// read into block cache.
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
Expand All @@ -197,7 +197,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {

numopen = TestGetTickerCount(options, NO_FILE_OPENS);
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value));
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));

Expand All @@ -207,15 +207,15 @@ TEST_P(DBBloomFilterTestDefFormatVersion, KeyMayExist) {

numopen = TestGetTickerCount(options, NO_FILE_OPENS);
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "a", &value));
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "a", &value));
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));

ASSERT_OK(Delete(1, "c"));

numopen = TestGetTickerCount(options, NO_FILE_OPENS);
cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
ASSERT_TRUE(!db_->KeyMayExist(ropts, handles_[1], "c", &value));
ASSERT_FALSE(db_->KeyMayExist(ropts, handles_[1], "c", &value));
ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));

Expand Down Expand Up @@ -2177,24 +2177,130 @@ TEST_F(DBBloomFilterTest, MemtableWholeKeyBloomFilterMultiGet) {

db_->ReleaseSnapshot(snapshot);
}
namespace {
std::pair<uint64_t, uint64_t> GetBloomStat(const Options& options, bool sst) {
if (sst) {
return {options.statistics->getAndResetTickerCount(
NON_LAST_LEVEL_SEEK_FILTER_MATCH),
options.statistics->getAndResetTickerCount(
NON_LAST_LEVEL_SEEK_FILTERED)};
} else {
auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0);
auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0);
return {hit, miss};
}
}

TEST_F(DBBloomFilterTest, MemtablePrefixBloomOutOfDomain) {
constexpr size_t kPrefixSize = 8;
const std::string kKey = "key";
assert(kKey.size() < kPrefixSize);
std::pair<uint64_t, uint64_t> HitAndMiss(uint64_t hits, uint64_t misses) {
return {hits, misses};
}
} // namespace

TEST_F(DBBloomFilterTest, MemtablePrefixBloom) {
Options options = CurrentOptions();
options.prefix_extractor.reset(NewFixedPrefixTransform(kPrefixSize));
options.prefix_extractor.reset(NewFixedPrefixTransform(4));
options.memtable_prefix_bloom_size_ratio = 0.25;
Reopen(options);
ASSERT_OK(Put(kKey, "v"));
ASSERT_EQ("v", Get(kKey));
std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
iter->Seek(kKey);
ASSERT_FALSE(options.prefix_extractor->InDomain("key"));
ASSERT_OK(Put("key", "v"));
ASSERT_OK(Put("goat1", "g1"));
ASSERT_OK(Put("goat2", "g2"));

GetBloomStat(options, false); // Reset

// Out of domain
ASSERT_EQ("v", Get("key"));
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));

// In domain
ASSERT_EQ("g1", Get("goat1"));
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false));
ASSERT_EQ("NOT_FOUND", Get("goat9"));
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false));
ASSERT_EQ("NOT_FOUND", Get("goan1"));
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false));

ReadOptions ropts;
if (options.prefix_seek_opt_in_only) {
ropts.prefix_same_as_start = true;
}
std::unique_ptr<Iterator> iter(db_->NewIterator(ropts));
// Out of domain
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: just to clarify, for out-of-domain queries, the expectation is that the prefix extractor (and any prefix blooms) will be ignored, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct

iter->Seek("ke");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("key", iter->key());
iter->SeekForPrev("kez");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("key", iter->key());
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));

// In domain
iter->Seek("goan");
ASSERT_OK(iter->status());
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false));
iter->Seek("goat");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("goat1", iter->key());
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false));

// Changing prefix extractor should affect prefix query semantics
// and bypass the existing memtable Bloom filter
ASSERT_OK(db_->SetOptions({{"prefix_extractor", "fixed:5"}}));
iter.reset(db_->NewIterator(ropts));
// Now out of domain
iter->Seek("goan");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("goat1", iter->key());
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));
// In domain
iter->Seek("goat2");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ("goat2", iter->key());
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));
// In domain
if (ropts.prefix_same_as_start) {
iter->Seek("goat0");
ASSERT_OK(iter->status());
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));
} else {
// NOTE: legacy prefix Seek may return keys outside of prefix
}

// Start a fresh new memtable, using new prefix extractor
ASSERT_OK(SingleDelete("key"));
ASSERT_OK(SingleDelete("goat1"));
ASSERT_OK(SingleDelete("goat2"));
ASSERT_OK(Flush());

ASSERT_OK(Put("goat1", "g1"));
ASSERT_OK(Put("goat2", "g2"));

iter.reset(db_->NewIterator(ropts));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe toss in some Gets too like above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do


// Now out of domain
iter->Seek("goan");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(kKey, iter->key());
iter->SeekForPrev(kKey);
ASSERT_EQ("goat1", iter->key());
ASSERT_EQ(HitAndMiss(0, 0), GetBloomStat(options, false));
// In domain
iter->Seek("goat2");
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(kKey, iter->key());
ASSERT_EQ("goat2", iter->key());
ASSERT_EQ(HitAndMiss(1, 0), GetBloomStat(options, false));
// In domain
iter->Seek("goat0");
ASSERT_OK(iter->status());
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(HitAndMiss(0, 1), GetBloomStat(options, false));
}

class DBBloomFilterTestVaryPrefixAndFormatVer
Expand Down Expand Up @@ -2507,7 +2613,11 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
ASSERT_OK(Put(key1, value1, WriteOptions()));
ASSERT_OK(Put(key3, value3, WriteOptions()));

std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
ReadOptions ropts;
if (options_.prefix_seek_opt_in_only) {
ropts.prefix_same_as_start = true;
}
std::unique_ptr<Iterator> iter(dbfull()->NewIterator(ropts));

// check memtable bloom stats
iter->Seek(key1);
Expand All @@ -2526,13 +2636,13 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {

iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count);
ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count);

ASSERT_OK(Flush());

iter.reset(dbfull()->NewIterator(ReadOptions()));
iter.reset(dbfull()->NewIterator(ropts));

// Check SST bloom stats
iter->Seek(key1);
Expand All @@ -2550,7 +2660,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {

iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(1, get_perf_context()->bloom_sst_miss_count);
ASSERT_EQ(expected_hits, get_perf_context()->bloom_sst_hit_count);
}
Expand Down Expand Up @@ -2659,9 +2769,14 @@ TEST_F(DBBloomFilterTest, PrefixScan) {
PrefixScanInit(this);
count = 0;
env_->random_read_counter_.Reset();
iter = db_->NewIterator(ReadOptions());
ReadOptions ropts;
if (options.prefix_seek_opt_in_only) {
ropts.prefix_same_as_start = true;
}
iter = db_->NewIterator(ropts);
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
if (!iter->key().starts_with(prefix)) {
ASSERT_FALSE(ropts.prefix_same_as_start);
break;
}
count++;
Expand Down Expand Up @@ -3397,23 +3512,6 @@ class FixedSuffix4Transform : public SliceTransform {

bool InDomain(const Slice& src) const override { return src.size() >= 4; }
};

std::pair<uint64_t, uint64_t> GetBloomStat(const Options& options, bool sst) {
if (sst) {
return {options.statistics->getAndResetTickerCount(
NON_LAST_LEVEL_SEEK_FILTER_MATCH),
options.statistics->getAndResetTickerCount(
NON_LAST_LEVEL_SEEK_FILTERED)};
} else {
auto hit = std::exchange(get_perf_context()->bloom_memtable_hit_count, 0);
auto miss = std::exchange(get_perf_context()->bloom_memtable_miss_count, 0);
return {hit, miss};
}
}

std::pair<uint64_t, uint64_t> HitAndMiss(uint64_t hits, uint64_t misses) {
return {hits, misses};
}
} // anonymous namespace

// This uses a prefix_extractor + comparator combination that violates
Expand Down Expand Up @@ -3447,9 +3545,7 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter1) {
ASSERT_OK(Flush());
}
ReadOptions read_options;
if (flushed) { // TODO: support auto_prefix_mode in memtable?
read_options.auto_prefix_mode = true;
}
read_options.auto_prefix_mode = true;
EXPECT_EQ(GetBloomStat(options, flushed), HitAndMiss(0, 0));
{
Slice ub("999aaaa");
Expand Down Expand Up @@ -3517,9 +3613,8 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter2) {
ASSERT_OK(Flush());
}
ReadOptions read_options;
if (flushed) { // TODO: support auto_prefix_mode in memtable?
read_options.auto_prefix_mode = true;
} else {
read_options.auto_prefix_mode = true;
if (!flushed) {
// TODO: why needed?
get_perf_context()->bloom_memtable_hit_count = 0;
get_perf_context()->bloom_memtable_miss_count = 0;
Expand Down Expand Up @@ -3661,9 +3756,8 @@ TEST_F(DBBloomFilterTest, WeirdPrefixExtractorWithFilter3) {
ASSERT_OK(Flush());
}
ReadOptions read_options;
if (flushed) { // TODO: support auto_prefix_mode in memtable?
read_options.auto_prefix_mode = true;
} else {
read_options.auto_prefix_mode = true;
if (!flushed) {
// TODO: why needed?
get_perf_context()->bloom_memtable_hit_count = 0;
get_perf_context()->bloom_memtable_miss_count = 0;
Expand Down
13 changes: 10 additions & 3 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2066,15 +2066,18 @@ InternalIterator* DBImpl::NewInternalIterator(
bool allow_unprepared_value, ArenaWrappedDBIter* db_iter) {
InternalIterator* internal_iter;
assert(arena != nullptr);
auto prefix_extractor =
super_version->mutable_cf_options.prefix_extractor.get();
// Need to create internal iterator from the arena.
MergeIteratorBuilder merge_iter_builder(
&cfd->internal_comparator(), arena,
!read_options.total_order_seek &&
super_version->mutable_cf_options.prefix_extractor != nullptr,
!read_options.total_order_seek && // FIXME?
prefix_extractor != nullptr,
read_options.iterate_upper_bound);
// Collect iterator for mutable memtable
auto mem_iter = super_version->mem->NewIterator(
read_options, super_version->GetSeqnoToTimeMapping(), arena);
read_options, super_version->GetSeqnoToTimeMapping(), arena,
super_version->mutable_cf_options.prefix_extractor.get());
Status s;
if (!read_options.ignore_range_deletions) {
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter;
Expand All @@ -2098,6 +2101,7 @@ InternalIterator* DBImpl::NewInternalIterator(
if (s.ok()) {
super_version->imm->AddIterators(
read_options, super_version->GetSeqnoToTimeMapping(),
super_version->mutable_cf_options.prefix_extractor.get(),
&merge_iter_builder, !read_options.ignore_range_deletions);
}
TEST_SYNC_POINT_CALLBACK("DBImpl::NewInternalIterator:StatusCallback", &s);
Expand Down Expand Up @@ -3805,6 +3809,9 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options,
read_options.io_activity = Env::IOActivity::kDBIterator;
}

read_options.total_order_seek |=
immutable_db_options_.prefix_seek_opt_in_only;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the best (or only) place to do this? We also have NewIterators as well as some other features like transactions which call NewIteratorImpl directly

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixing. I'll have an assertion in DBIter::DBIter that prefix_seek_opt_in_only has been applied to total_order_seek

if (read_options.managed) {
return NewErrorIterator(
Status::NotSupported("Managed iterator is not supported anymore."));
Expand Down
3 changes: 2 additions & 1 deletion db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
TableProperties table_properties;
{
ScopedArenaPtr<InternalIterator> iter(
mem->NewIterator(ro, /*seqno_to_time_mapping=*/nullptr, &arena));
mem->NewIterator(ro, /*seqno_to_time_mapping=*/nullptr, &arena,
/*prefix_extractor=*/nullptr));
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
"[%s] [WriteLevel0TableForRecovery]"
" Level-0 table #%" PRIu64 ": started",
Expand Down
5 changes: 2 additions & 3 deletions db/db_iter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
valid_(false),
current_entry_is_merged_(false),
is_key_seqnum_zero_(false),
prefix_same_as_start_(mutable_cf_options.prefix_extractor
? read_options.prefix_same_as_start
: false),
prefix_same_as_start_(
prefix_extractor_ ? read_options.prefix_same_as_start : false),
pin_thru_lifetime_(read_options.pin_data),
expect_total_order_inner_iter_(prefix_extractor_ == nullptr ||
read_options.total_order_seek ||
Expand Down
2 changes: 2 additions & 0 deletions db/db_test2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5597,6 +5597,7 @@ TEST_F(DBTest2, PrefixBloomFilteredOut) {
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.whole_key_filtering = false;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
DestroyAndReopen(options);

// Construct two L1 files with keys:
Expand Down Expand Up @@ -5987,6 +5988,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
// create a DB with block prefix index
BlockBasedTableOptions table_options;
Options options = CurrentOptions();
options.prefix_seek_opt_in_only = false; // Use legacy prefix seek

// Sometimes filter is checked based on upper bound. Assert counters
// for that case. Otherwise, only check data correctness.
Expand Down
Loading
Loading