Skip to content

Commit

Permalink
mpt: Remove explicit extended node kind
Browse files Browse the repository at this point in the history
Notice that the formally specified extended node in
the Merkle Patricia Trie always leads to a branch node.
Therefore, we can treat branch nodes as having potentially non-empty
"extended path" and remove the explicit extended node kind.
This significantly simplifies the implementation.
  • Loading branch information
chfast committed Jan 6, 2024
1 parent 9af0b26 commit 11af3f5
Showing 1 changed file with 56 additions and 100 deletions.
156 changes: 56 additions & 100 deletions test/state/mpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ namespace evmone::state
namespace
{
/// The MPT node kind.
enum class Kind : uint8_t
enum class Kind : bool
{
leaf,
ext,
branch
};

Expand Down Expand Up @@ -59,15 +58,17 @@ class Path

[[nodiscard]] bytes encode(Kind kind) const
{
assert(kind == Kind::leaf || kind == Kind::ext);
if (kind == Kind::branch && m_size == 0) // FIXME: Can leaf have empty path?
return {};

const auto kind_prefix = kind == Kind::leaf ? 0x20 : 0x00;
const auto has_odd_size = m_size % 2 != 0;
const auto nibble_prefix = has_odd_size ? (0x10 | m_nibbles[0]) : 0x00;

bytes encoded{static_cast<uint8_t>(kind_prefix | nibble_prefix)};
for (auto i = size_t{has_odd_size}; i < m_size; i += 2)
encoded.push_back(static_cast<uint8_t>((m_nibbles[i] << 4) | m_nibbles[i + 1]));
return encoded;
return rlp::encode(encoded);
}
};
} // namespace
Expand All @@ -90,39 +91,19 @@ class MPTNode
: m_kind{kind}, m_path{path}, m_value{std::move(value)}
{}

/// Creates an extended node.
static MPTNode ext(const Path& path, std::unique_ptr<MPTNode> child) noexcept
{
assert(child->m_kind == Kind::branch);
MPTNode node{Kind::ext, path};
node.m_children[0] = std::move(child);
return node;
}

/// Optionally wraps the child node with newly created extended node in case
/// the provided path is not empty.
static std::unique_ptr<MPTNode> optional_ext(
const Path& path, std::unique_ptr<MPTNode> child) noexcept
{
return (!path.empty()) ? std::make_unique<MPTNode>(ext(path, std::move(child))) :
std::move(child);
}

/// Creates a branch node out of two children and optionally extends it with an extended
/// node in case the path is not empty.
static MPTNode ext_branch(const Path& path, size_t idx1, std::unique_ptr<MPTNode> child1,
static MPTNode branch(const Path& path, size_t idx1, std::unique_ptr<MPTNode> child1,
size_t idx2, std::unique_ptr<MPTNode> child2) noexcept
{
assert(idx1 != idx2);
assert(idx1 < num_children);
assert(idx2 < num_children);

MPTNode br{Kind::branch};
MPTNode br{Kind::branch, path};
br.m_children[idx1] = std::move(child1);
br.m_children[idx2] = std::move(child2);

return (!path.empty()) ? ext(path, std::make_unique<MPTNode>(std::move(br))) :
std::move(br);
return br;
}

public:
Expand All @@ -143,108 +124,83 @@ void MPTNode::insert(const Path& path, bytes&& value) // NOLINT(misc-no-recursi
{
// The insertion is all about branch nodes. In happy case we will find an empty slot
// in an existing branch node. Otherwise, we need to create new branch node
// (possibly with an adjusted extended node) and transform existing nodes around it.

const auto [this_idx, insert_idx] = std::ranges::mismatch(m_path, path);

// insert_idx is always valid if requirements are fulfilled:
// (possibly with an extended path) and transform existing nodes around it.

// Let's consider the following branch node with extended path "ab".
//
// |
// |a ↙③
// |b
// |
// [a|b|c|d]
// | ②
//
//
// If the insert path prefix matches the "ab" we insert to one of the children:
// - e.g. for "aba" insert into existing child ①,
// - e.g. for "abd" create new leaf node ②.
// If the insert path prefix doesn't match "ab" we split the extended path by
// a new branch node of the "this" branch node and a new leaf node.
// E.g. for "acd" insert new branch node "a" at ③ with:
// - at "b" : the "this" branch node with empty extended path "",
// - at "c" : the new leaf node with path "d".

const auto [this_idx_it, insert_idx_it] = std::ranges::mismatch(m_path, path);

// insert_idx_it is always valid if requirements are fulfilled:
// - if m_path is not shorter than path they must have mismatched nibbles,
// given the requirement of key uniqueness and not being a prefix if existing key,
// - if m_path is shorter and matches the path prefix
// then insert_idx points at path[m_path.size()].
assert(insert_idx != path.end() && "a key must not be a prefix of another key");

const Path common{m_path.begin(), this_idx};
const Path insert_tail{insert_idx + 1, path.end()};
// then insert_idx_it points at path[m_path.size()].
assert(insert_idx_it != path.end() && "a key must not be a prefix of another key");
const Path insert_tail{insert_idx_it + 1, path.end()};

switch (m_kind)
{
case Kind::branch:
if (m_kind == Kind::branch && this_idx_it == m_path.end()) // Paths match: go into the child.
{
assert(m_path.empty()); // Branch has no path.
if (auto& child = m_children[*insert_idx]; child)
child->insert(insert_tail, std::move(value));
if (auto& child = m_children[*insert_idx_it]; child)
child->insert(insert_tail, std::move(value)); //
else
child = leaf(insert_tail, std::move(value));
break;
child = leaf(insert_tail, std::move(value)); //
}

case Kind::ext:
else // ③: Shorten path of this node and insert it to the new branch node.
{
assert(!m_path.empty()); // Ext must have non-empty path.
if (this_idx == m_path.end()) // Paths match: go into the child.
return m_children[0]->insert({insert_idx, path.end()}, std::move(value));

// The original branch node must be pushed down, possible extended with
// the adjusted extended node if the path split point is not directly at the branch node.
// Clang Analyzer bug: https://github.com/llvm/llvm-project/issues/47814
// NOLINTNEXTLINE(clang-analyzer-cplusplus.NewDeleteLeaks)
auto this_branch = optional_ext({this_idx + 1, m_path.end()}, std::move(m_children[0]));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_branch), *insert_idx, std::move(new_leaf));
break;
}

case Kind::leaf:
{
assert(!m_path.empty()); // Leaf must have non-empty path.
assert(this_idx != m_path.end() && "a key must be unique");
auto this_leaf = leaf({this_idx + 1, m_path.end()}, std::move(m_value));
auto new_leaf = leaf(insert_tail, std::move(value));
*this =
ext_branch(common, *this_idx, std::move(this_leaf), *insert_idx, std::move(new_leaf));
break;
}

default:
assert(false);
const Path extended_path{m_path.begin(), this_idx_it};
const auto this_idx = *this_idx_it;
m_path = Path{this_idx_it + 1, m_path.end()}; // shorten this path, invalidates this_idx_it
*this = branch(extended_path, this_idx, std::make_unique<MPTNode>(std::move(*this)),
*insert_idx_it, leaf(insert_tail, std::move(value)));
}
}

/// Encodes a node and optionally hashes the encoded bytes
/// if their length exceeds the specified threshold.
static bytes encode_child(const MPTNode& child) noexcept // NOLINT(misc-no-recursion)
{
if (auto e = child.encode(); e.size() < 32)
return e; // "short" node
else
return rlp::encode(keccak256(e));
}

bytes MPTNode::encode() const // NOLINT(misc-no-recursion)
{
bytes encoded;
static constexpr auto shorten = [](bytes&& b) {
return (b.size() < 32) ? std::move(b) : rlp::encode(keccak256(b));
};

bytes encoded; // the encoded content of the node without its path
switch (m_kind)
{
case Kind::leaf:
{
encoded = rlp::encode(m_path.encode(m_kind)) + rlp::encode(m_value);
encoded = rlp::encode(m_value);
break;
}
case Kind::branch:
{
assert(m_path.empty());
static constexpr uint8_t empty = 0x80; // encoded empty child

for (const auto& child : m_children)
{
if (child)
encoded += encode_child(*child);
else
encoded += empty;
}
encoded += child ? shorten(child->encode()) : bytes{empty};
encoded += empty; // end indicator
break;
}
case Kind::ext:
{
encoded = rlp::encode(m_path.encode(m_kind)) + encode_child(*m_children[0]);

if (!m_path.empty()) // extended node
encoded = shorten(rlp::internal::wrap_list(encoded));
break;
}
}

return rlp::internal::wrap_list(encoded);
return rlp::internal::wrap_list(m_path.encode(m_kind) + encoded);
}


Expand Down

0 comments on commit 11af3f5

Please sign in to comment.