Skip to content

Commit

Permalink
Fix the sorting of KeyContexts for batched MultiGet (#8633)
Browse files Browse the repository at this point in the history
Summary:
`CompareKeyContext::operator()` on the trunk has a bug: when comparing
column family IDs, `lhs` is used for both sides of the comparison. This
results in the `KeyContext`s getting sorted solely based on key, which
in turn means that keys with the same column family do not necessarily
form a single range in the sorted list. This violates an assumption of the
batched `MultiGet` logic, leading to the same column family
showing up multiple times in the list of `MultiGetColumnFamilyData`.
The end result is the code attempting to check out the thread-local
`SuperVersion` for the same CF multiple times, causing an
assertion violation in debug builds and memory corruption/crash in
release builds.

Pull Request resolved: #8633

Test Plan: `make check`

Reviewed By: riversand963

Differential Revision: D30169182

Pulled By: ltamasi

fbshipit-source-id: a47710652df7e95b14b40fb710924c11a8478023
  • Loading branch information
ltamasi committed Aug 9, 2021
1 parent 53da604 commit 37c4996
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 36 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
## 6.23.3 (2021-08-04)
### Bug Fixes
* Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file.
* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`.

## 6.23.2 (2021-08-04)
### Bug Fixes
Expand Down
22 changes: 22 additions & 0 deletions db/db_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) {
}
}

TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"one", "two"}, options);

ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(2, "baz", "xyz"));
ASSERT_OK(Put(1, "abc", "def"));

// Note: keys for the same CF do not form a consecutive range
std::vector<int> cfs{1, 2, 1};
std::vector<std::string> keys{"foo", "baz", "abc"};
std::vector<std::string> values;

values =
MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam());

ASSERT_EQ(values.size(), 3);
ASSERT_EQ(values[0], "bar");
ASSERT_EQ(values[1], "xyz");
ASSERT_EQ(values[2], "def");
}

INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
testing::Bool());

Expand Down
55 changes: 19 additions & 36 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2276,20 +2276,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
multiget_cf_data;
size_t cf_start = 0;
ColumnFamilyHandle* cf = sorted_keys[0]->column_family;

for (size_t i = 0; i < num_keys; ++i) {
KeyContext* key_ctx = sorted_keys[i];
if (key_ctx->column_family != cf) {
multiget_cf_data.emplace_back(
MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr));
multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
cf_start = i;
cf = key_ctx->column_family;
}
}
{
// multiget_cf_data.emplace_back(
// MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr));
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
}

multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);

std::function<MultiGetColumnFamilyData*(
autovector<MultiGetColumnFamilyData,
MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
Expand Down Expand Up @@ -2349,7 +2347,7 @@ struct CompareKeyContext {
static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
uint32_t cfd_id1 = cfh->cfd()->GetID();
const Comparator* comparator = cfh->cfd()->user_comparator();
cfh = static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
uint32_t cfd_id2 = cfh->cfd()->GetID();

if (cfd_id1 < cfd_id2) {
Expand All @@ -2373,39 +2371,24 @@ struct CompareKeyContext {
void DBImpl::PrepareMultiGetKeys(
size_t num_keys, bool sorted_input,
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
#ifndef NDEBUG
if (sorted_input) {
for (size_t index = 0; index < sorted_keys->size(); ++index) {
if (index > 0) {
KeyContext* lhs = (*sorted_keys)[index - 1];
KeyContext* rhs = (*sorted_keys)[index];
ColumnFamilyHandleImpl* cfh =
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
uint32_t cfd_id1 = cfh->cfd()->GetID();
const Comparator* comparator = cfh->cfd()->user_comparator();
cfh =
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
uint32_t cfd_id2 = cfh->cfd()->GetID();

assert(cfd_id1 <= cfd_id2);
if (cfd_id1 < cfd_id2) {
continue;
}
#ifndef NDEBUG
CompareKeyContext key_context_less;

// Both keys are from the same column family
int cmp = comparator->CompareWithoutTimestamp(
*(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
assert(cmp <= 0);
}
index++;
for (size_t index = 1; index < sorted_keys->size(); ++index) {
const KeyContext* const lhs = (*sorted_keys)[index - 1];
const KeyContext* const rhs = (*sorted_keys)[index];

// lhs should be <= rhs, or in other words, rhs should NOT be < lhs
assert(!key_context_less(rhs, lhs));
}
}
#endif
if (!sorted_input) {
CompareKeyContext sort_comparator;
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
sort_comparator);

return;
}

std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
CompareKeyContext());
}

void DBImpl::MultiGet(const ReadOptions& read_options,
Expand Down

0 comments on commit 37c4996

Please sign in to comment.