Skip to content

reverse scan apiの制限つき実装 #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions include/interface_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ template<class ValueType>
scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
std::string_view r_key, scan_endpoint r_end,
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec,
std::size_t max_size) {
std::vector<std::pair<node_version64_body, node_version64*>>* node_version_vec,
std::size_t max_size,
bool right_to_left = false) {

/**
* Prohibition : std::string_view{nullptr, non-zero value}.
*/
Expand All @@ -45,6 +46,13 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
return status::ERR_BAD_USAGE;
}

/**
* currently right_to_left is restricted to unbounded scan with max_size == 1
*/
if (right_to_left && (r_end != scan_endpoint::INF || max_size != 1)) {
return status::ERR_BAD_USAGE;
}

retry_from_root:
// clear out parameter, this must be after retry_from_root for retry.
tuple_list.clear();
Expand Down Expand Up @@ -73,6 +81,12 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
traverse_key_view.size());
}
}
if (right_to_left) {
// assuming r_end == scan_endpoint::INF
// put maximum value of key_slice
key_slice = ~key_slice_type{0};
key_slice_length = sizeof(key_slice_type);
}
/**
* traverse tree to border node.
*/
Expand Down Expand Up @@ -108,7 +122,7 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
check_status = scan_border<ValueType>(
&target_border, traverse_key_view, l_end, r_key, r_end,
tuple_list, std::get<tuple_v_index>(node_and_v),
node_version_vec, key_prefix, max_size);
node_version_vec, key_prefix, max_size, right_to_left);

// check rc, success
if (check_status == status::OK_SCAN_END) { return status::OK; }
Expand All @@ -133,14 +147,15 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec = nullptr,
std::size_t max_size = 0) {
std::size_t max_size = 0,
bool right_to_left = false) {
// check storage
tree_instance* ti{};
if (storage::find_storage(storage_name, &ti) != status::OK) {
return status::WARN_STORAGE_NOT_EXIST;
}
return scan(ti, l_key, l_end, r_key, r_end, tuple_list, node_version_vec,
max_size);
max_size, right_to_left);
}

} // namespace yakushima
7 changes: 6 additions & 1 deletion include/kvs.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ put(Token token, std::string_view storage_name, // NOLINT
* node_version_vec to make sure the values ​​are not overwritten. This advantage
* is effective when the right end point is unknown but you want to scan to a
* specific value.
* @param[in] right_to_left If this argument is true, the scan is performed from right end.
* When this is set to true, current implementation has following limitation: 1. max_size must be 1 so that at most
* one entry is hit and returned as scan result 2. r_end must be scan_endpoint::INF so that the scan is performed from
* unbounded right end. Status::ERR_BAD_USAGE is returned if these conditions are not met.
* @return Status::ERR_BAD_USAGE The arguments is invalid. In the case1: you use
* same l_key and r_key and one of the endpoint is exclusive. case2: one of the
* endpoint use null key but the string size is not zero like
Expand All @@ -272,6 +276,7 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec,
std::size_t max_size);
std::size_t max_size,
bool right_to_left);

} // namespace yakushima
21 changes: 15 additions & 6 deletions include/scan_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ scan(base_node* const root, const std::string_view l_key,
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>* const
node_version_vec,
const std::string& key_prefix, const std::size_t max_size) {
const std::string& key_prefix, const std::size_t max_size, bool right_to_left) {
/**
* Log size before scanning this node.
* This must be before retry label for retry at find border.
Expand Down Expand Up @@ -106,6 +106,12 @@ scan(base_node* const root, const std::string_view l_key,
} else {
if (!l_key.empty()) { memcpy(&ks, l_key.data(), l_key.size()); }
}
if (right_to_left) {
// assuming r_end == scan_endpoint::INF
// put maximum value of key_slice
ks = ~key_slice_type{0};
kl = sizeof(key_slice_type);
}
node_and_v = find_border(root, ks, kl, check_status);
if (check_status == status::WARN_RETRY_FROM_ROOT_OF_ALL) {
return status::OK_RETRY_AFTER_FB;
Expand All @@ -124,7 +130,7 @@ scan(base_node* const root, const std::string_view l_key,
// scan the border node
check_status = scan_border<ValueType>(
&bn, l_key, l_end, r_key, r_end, tuple_list, check_v,
node_version_vec, key_prefix, max_size);
node_version_vec, key_prefix, max_size, right_to_left);

// check rc, success
if (check_status == status::OK_SCAN_END) { return status::OK; }
Expand Down Expand Up @@ -170,7 +176,7 @@ scan_border(border_node** const target, const std::string_view l_key,
node_version64_body& v_at_fb,
std::vector<std::pair<node_version64_body, node_version64*>>* const
node_version_vec,
const std::string& key_prefix, const std::size_t max_size) {
const std::string& key_prefix, const std::size_t max_size, bool right_to_left) {
/**
* Log size before scanning this node.
* This must be before retry label for retry at find border.
Expand Down Expand Up @@ -215,16 +221,19 @@ scan_border(border_node** const target, const std::string_view l_key,
border_node* bn = *target;
/**
* next node pointer must be logged before optimistic verify.
* When right_to_left is true, we stop at the first border node and don't use this.
* TODO When we extend reverse scan for multiple entries, we need get_prev() here.
*/
border_node* next = bn->get_next();

/**
* get permutation at once.
* After scan border, optimistic verify support this is atomic.
*/
permutation perm(bn->get_permutation().get_body());
// check all elements in border node.
for (std::size_t i = 0; i < perm.get_cnk(); ++i) {
std::size_t index = perm.get_index_of_rank(i);
for (std::size_t i = 0, n = perm.get_cnk(); i < n; ++i) {
std::size_t index = perm.get_index_of_rank(right_to_left ? n-i-1 : i);
key_slice_type ks = bn->get_key_slice_at(index);
key_length_type kl = bn->get_key_length_at(index);
std::string full_key{key_prefix};
Expand Down Expand Up @@ -314,7 +323,7 @@ scan_border(border_node** const target, const std::string_view l_key,
}
check_status =
scan(next_layer, arg_l_key, arg_l_end, arg_r_key, arg_r_end,
tuple_list, node_version_vec, full_key, max_size);
tuple_list, node_version_vec, full_key, max_size, right_to_left);
if (check_status != status::OK) {
// failed. clean up tuple list and node vesion vec.
clean_up_tuple_list_nvc();
Expand Down
235 changes: 235 additions & 0 deletions test/scan/scan_reverse_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
/**
* @file scan_basic_usage_test.cpp
*/

#include <array>

#include "gtest/gtest.h"

#include "kvs.h"

using namespace yakushima;

namespace yakushima::testing {

std::string st{"s"}; // NOLINT

class scan_reverse_test : public ::testing::Test {
void SetUp() override {
init();
create_storage(st);
}

void TearDown() override { fin(); }
};

std::string_view key(std::tuple<std::string, char*, std::size_t> const& t) {
return std::get<0>(t);
}

std::string_view value(std::tuple<std::string, char*, std::size_t> const& t) {
return std::string_view{std::get<1>(t), std::get<2>(t)};
}

TEST_F(scan_reverse_test, basic_usage) { // NOLINT
std::string k0("k0");
std::string k1("k1");
std::string v0("v0");
std::string v1("v1");
Token token{};
ASSERT_EQ(enter(token), status::OK);
ASSERT_EQ(status::OK, put(token, st, k0, v0.data(), v0.size()));
ASSERT_EQ(status::OK, put(token, st, k1, v1.data(), v1.size()));
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
auto verify = [&tup_lis, &nv, &v1]() {
if (tup_lis.size() != 1) { return false; }
if (tup_lis.size() != nv.size()) { return false; }
if (std::get<2>(tup_lis.at(0)) != v1.size()) { return false; }
if (memcmp(std::get<1>(tup_lis.at(0)), v1.data(), v1.size()) != 0) {
return false;
}
return true;
};
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));

ASSERT_EQ(true, verify());
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);

ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);

// currently max_size must be 1 and r_end == INF for reverse scan
ASSERT_EQ(status::ERR_BAD_USAGE,
scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INCLUSIVE, tup_lis, &nv, 1, true));
ASSERT_EQ(status::ERR_BAD_USAGE,
scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 0, true));

ASSERT_EQ(leave(token), status::OK);
}

TEST_F(scan_reverse_test, scan_results_zero) { // NOLINT
Token s{};
ASSERT_EQ(status::OK, enter(s));
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK,
scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(status::OK, leave(s));
}

TEST_F(scan_reverse_test, long_key_scan) { // NOLINT
// prepare
Token s{};
ASSERT_EQ(status::OK, enter(s));
std::string st{"test"};
ASSERT_EQ(status::OK, create_storage(st));

for (std::size_t i = 1024; i <= 1024 * 30; i += 1024) { // NOLINT
// put
LOG(INFO) << "test key size " << i / 1024 << " KiB";
std::string k0(i, 'a');
std::string k1(i, 'b');
std::string v0{"v0"};
std::string v1{"v1"};
ASSERT_EQ(status::OK, put(s, st, k0, v0.data(), v0.size()));
ASSERT_EQ(status::OK, put(s, st, k1, v1.data(), v1.size()));

// test: scan
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);
}

// cleanup
ASSERT_EQ(status::OK, leave(s));
}

TEST_F(scan_reverse_test, scan_single_border) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 7; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* border node
* 0, 1, 2, 3, 4, 5, 6, 7
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT
EXPECT_EQ(key(tup[0]), "\x07");
EXPECT_EQ(value(tup[0]), "v7");

ASSERT_EQ(leave(token), status::OK);
}

TEST_F(scan_reverse_test, scan_two_borders) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 16; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* border nodes
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15, 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT

EXPECT_EQ(key(tup[0]), "\x10");
EXPECT_EQ(value(tup[0]), "v16");
}

TEST_F(scan_reverse_test, scan_three_borders) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 25; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK,
put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
* branch of A and B is 8
* branch of B and C is 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT
EXPECT_EQ(key(tup[0]), "\x19");
EXPECT_EQ(value(tup[0]), "v25");
}

TEST_F(scan_reverse_test, scan_three_borders_removed_last) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 25; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK,
put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
* branch of A and B is 8
* branch of B and C is 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT

auto delete_range = [&token](char begin, char end) {
for (char i = begin; i <= end; ++i) {
char c = i;
ASSERT_EQ(status::OK, remove(token, st, std::string_view(&c, 1)));
}
};
delete_range(25, 25); // NOLINT
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24
*/
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
EXPECT_EQ(key(tup[0]), "\x18");
EXPECT_EQ(value(tup[0]), "v24");

ASSERT_EQ(leave(token), status::OK);
}

} // namespace yakushima::testing