Skip to content

Commit

Permalink
Merge pull request #2 from kampersanda/fix-cps
Browse files Browse the repository at this point in the history
Fix bug in common prefix search
  • Loading branch information
kampersanda authored Jul 10, 2022
2 parents b5b40e9 + 1fe88b1 commit 7534644
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 17 deletions.
30 changes: 21 additions & 9 deletions include/xcdat/tail_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <algorithm>
#include <functional>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
Expand Down Expand Up @@ -166,31 +167,42 @@ class tail_vector {
}
}

inline bool prefix_match(std::string_view key, std::uint64_t tpos) const {
assert(key.size() != 0);
std::uint64_t kpos = 0;
// Returns epos-tpos+1 if TAIL[tpos..epos] is a prefix of key.
inline std::optional<std::uint64_t> prefix_match(std::string_view key, std::uint64_t tpos) const {
if (tpos == 0) {
// suffix is empty, always matched.
return 0;
}
if (key.size() == 0) {
// When key is empty, match fails since the suffix is not empty here.
return std::nullopt;
}

std::uint64_t kpos = 0;
if (bin_mode()) {
do {
if (key[kpos] != m_chars[tpos]) {
return false;
return std::nullopt;
}
kpos += 1;
if (m_terms[tpos]) {
return kpos == key.size();
return kpos;
}
tpos += 1;
} while (kpos < key.size());
return true;
return kpos;
} else {
do {
if (!m_chars[tpos] || key[kpos] != m_chars[tpos]) {
return false;
if (!m_chars[tpos]) {
return kpos;
}
if (key[kpos] != m_chars[tpos]) {
return std::nullopt;
}
kpos += 1;
tpos += 1;
} while (kpos < key.size());
return true;
return kpos;
}
}

Expand Down
13 changes: 6 additions & 7 deletions include/xcdat/trie.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class trie {
class prefix_iterator {
private:
const trie_type* m_obj = nullptr;
std::string m_key;
std::string_view m_key;
std::uint64_t m_id = 0;
std::uint64_t m_kpos = 0;
std::uint64_t m_npos = 0;
Expand Down Expand Up @@ -231,7 +231,7 @@ class trie {

private:
const trie_type* m_obj = nullptr;
std::string m_key;
std::string_view m_key;
std::uint64_t m_id = 0;
std::string m_decoded;
std::vector<cursor_type> m_stack;
Expand Down Expand Up @@ -317,8 +317,7 @@ class trie {
: m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true),
m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {}

template <class String>
static constexpr String get_suffix(const String& s, std::uint64_t i) {
static constexpr std::string_view get_suffix(std::string_view s, std::uint64_t i) {
assert(i <= s.size());
return s.substr(i, s.size() - i);
}
Expand Down Expand Up @@ -376,12 +375,12 @@ class trie {
itr->is_end = true;

const std::uint64_t tpos = m_bcvec.link(itr->m_npos);
if (!m_tvec.match(get_suffix(itr->m_key, itr->m_kpos), tpos)) {
const auto matched = m_tvec.prefix_match(get_suffix(itr->m_key, itr->m_kpos), tpos);
if (!matched.has_value()) {
itr->m_id = num_keys();
return false;
}

itr->m_kpos = itr->m_key.size();
itr->m_kpos += matched.value();
itr->m_id = npos_to_id(itr->m_npos);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_trie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ TEST_CASE("Test " TRIE_NAME " (tiny)") {
test_basic_operations(trie, keys, others);

{
auto itr = trie.make_prefix_iterator("MacBook_Pro");
auto itr = trie.make_prefix_iterator("MacBook_Pro_13inch");
std::vector<std::string> expected = {"Mac", "MacBook", "MacBook_Pro"};
for (const auto& exp : expected) {
REQUIRE(itr.next());
Expand Down

0 comments on commit 7534644

Please sign in to comment.