From bdc1a2d9b0b4c7b5978a4df088cfb15bf99469cf Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 4 Feb 2025 20:38:15 -0600 Subject: [PATCH] refactor: put node hasher in charge of labeling nodes and determining kinds --- benchtop/src/nomt.rs | 2 +- core/src/proof/multi_proof.rs | 36 ++++++- core/src/proof/path_proof.rs | 6 +- core/src/trie.rs | 116 ++++++---------------- core/src/update.rs | 41 ++++++-- examples/witness_verification/src/main.rs | 34 ++++++- fuzz/fuzz_targets/api_surface.rs | 4 +- nomt/src/lib.rs | 37 ++++++- nomt/src/merkle/page_walker.rs | 14 +-- nomt/src/merkle/seek.rs | 8 +- nomt/tests/add_remove.rs | 2 +- nomt/tests/common/mod.rs | 5 +- nomt/tests/compute_root.rs | 17 +++- nomt/tests/rollback.rs | 2 +- nomt/tests/witness_check.rs | 2 +- 15 files changed, 193 insertions(+), 133 deletions(-) diff --git a/benchtop/src/nomt.rs b/benchtop/src/nomt.rs index ba397270..1bf7df35 100644 --- a/benchtop/src/nomt.rs +++ b/benchtop/src/nomt.rs @@ -1,7 +1,7 @@ use crate::{backend::Transaction, timer::Timer, workload::Workload}; use fxhash::FxHashMap; use nomt::{ - Blake3Hasher, KeyPath, KeyReadWrite, Nomt, Options, Overlay, Session, SessionParams, + trie::KeyPath, Blake3Hasher, KeyReadWrite, Nomt, Options, Overlay, Session, SessionParams, WitnessMode, }; use sha2::Digest; diff --git a/core/src/proof/multi_proof.rs b/core/src/proof/multi_proof.rs index afa79e4f..c6a51c4a 100644 --- a/core/src/proof/multi_proof.rs +++ b/core/src/proof/multi_proof.rs @@ -7,7 +7,7 @@ use crate::{ path_proof::{hash_path, shared_bits}, KeyOutOfScope, PathProof, PathProofTerminal, }, - trie::{InternalData, KeyPath, LeafData, Node, NodeHasher, NodeHasherExt, TERMINATOR}, + trie::{InternalData, KeyPath, LeafData, Node, NodeHasher, TERMINATOR}, }; #[cfg(not(feature = "std"))] @@ -498,7 +498,7 @@ mod tests { use crate::{ proof::{PathProof, PathProofTerminal}, - trie::{self, InternalData, LeafData, NodeHasher, NodeHasherExt, TERMINATOR}, + trie::{self, InternalData, LeafData, Node, NodeHasher, NodeKind, TERMINATOR}, trie_pos::TriePosition, }; @@ -888,8 +888,36 @@ mod tests { pub struct Blake3Hasher; impl NodeHasher for Blake3Hasher { - fn hash_node(data: &trie::NodePreimage) -> [u8; 32] { - blake3::hash(data).into() + fn hash_leaf(data: &trie::LeafData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.key_path); + hasher.update(&data.value_hash); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] |= 0b10000000; + hash + } + + fn hash_internal(data: &trie::InternalData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.left); + hasher.update(&data.right); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] &= 0b01111111; + hash + } + + fn node_kind(node: &Node) -> NodeKind { + if node[0] >> 7 == 1 { + NodeKind::Leaf + } else if node == &TERMINATOR { + NodeKind::Terminator + } else { + NodeKind::Internal + } } } diff --git a/core/src/proof/path_proof.rs b/core/src/proof/path_proof.rs index fe4a40ec..78ad0f4d 100644 --- a/core/src/proof/path_proof.rs +++ b/core/src/proof/path_proof.rs @@ -1,8 +1,6 @@ //! Proving and verifying inclusion, non-inclusion, and updates to the trie. -use crate::trie::{ - self, InternalData, KeyPath, LeafData, Node, NodeHasher, NodeHasherExt, NodeKind, TERMINATOR, -}; +use crate::trie::{self, InternalData, KeyPath, LeafData, Node, NodeHasher, NodeKind, TERMINATOR}; use crate::trie_pos::TriePosition; use bitvec::prelude::*; @@ -295,7 +293,7 @@ pub fn verify_update( *sibling }; - match (NodeKind::of(&cur_node), NodeKind::of(&sibling)) { + match (NodeKind::of::(&cur_node), NodeKind::of::(&sibling)) { (NodeKind::Terminator, NodeKind::Terminator) => {} (NodeKind::Leaf, NodeKind::Terminator) => {} (NodeKind::Terminator, NodeKind::Leaf) => { diff --git a/core/src/trie.rs b/core/src/trie.rs index c68f5ffa..c89de806 100644 --- a/core/src/trie.rs +++ b/core/src/trie.rs @@ -13,10 +13,11 @@ //! //! All node preimages are 512 bits. -/// A node in the binary trie. In this schema, it is always 256 bits and is the hash of an -/// underlying structure, represented as [`NodePreimage`]. +/// A node in the binary trie. In this schema, it is always 256 bits and is the hash of either +/// an [`LeafData`] or [`InternalData`], or zeroed if it's a [`TERMINATOR`]. /// -/// The first bit of the [`Node`] is used to indicate the kind of node +/// [`Node`]s are labeled by the [`NodeHasher`] used to indicate whether they are leaves or internal +/// nodes. Typically, this is done by setting the MSB. pub type Node = [u8; 32]; /// The path to a key. All paths have a 256 bit fixed length. @@ -25,12 +26,6 @@ pub type KeyPath = [u8; 32]; /// The hash of a value. In this schema, it is always 256 bits. pub type ValueHash = [u8; 32]; -/// The preimage of a node. In this schema, it is always 512 bits. -/// -/// Note that this encoding itself does not contain information about -/// whether the node is a leaf or internal node. -pub type NodePreimage = [u8; 64]; - /// The terminator hash is a special node hash value denoting an empty sub-tree. /// Concretely, when this appears at a given location in the trie, /// it implies that no key with a path beginning with the location has a value. @@ -39,18 +34,18 @@ pub type NodePreimage = [u8; 64]; pub const TERMINATOR: Node = [0u8; 32]; /// Whether the node hash indicates the node is a leaf. -pub fn is_leaf(hash: &Node) -> bool { - hash[0] >> 7 == 1 +pub fn is_leaf(hash: &Node) -> bool { + H::node_kind(hash) == NodeKind::Leaf } /// Whether the node hash indicates the node is an internal node. -pub fn is_internal(hash: &Node) -> bool { - hash[0] >> 7 == 0 && !is_terminator(&hash) +pub fn is_internal(hash: &Node) -> bool { + H::node_kind(hash) == NodeKind::Internal } /// Whether the node holds the special `EMPTY_SUBTREE` value. -pub fn is_terminator(hash: &Node) -> bool { - hash == &TERMINATOR +pub fn is_terminator(hash: &Node) -> bool { + H::node_kind(hash) == NodeKind::Terminator } /// The kind of a node. @@ -66,14 +61,8 @@ pub enum NodeKind { impl NodeKind { /// Get the kind of the provided node. - pub fn of(node: &Node) -> Self { - if is_leaf(node) { - NodeKind::Leaf - } else if is_terminator(node) { - NodeKind::Terminator - } else { - NodeKind::Internal - } + pub fn of(node: &Node) -> Self { + H::node_kind(node) } } @@ -86,16 +75,6 @@ pub struct InternalData { pub right: Node, } -impl InternalData { - /// Encode the internal node. - pub fn encode(&self) -> NodePreimage { - let mut node = [0u8; 64]; - node[0..32].copy_from_slice(&self.left[..]); - node[32..64].copy_from_slice(&self.right[..]); - node - } -} - /// The data of a leaf node. #[derive(Debug, Default, Clone, PartialEq, Eq)] #[cfg_attr( @@ -112,63 +91,24 @@ pub struct LeafData { pub value_hash: ValueHash, } -impl LeafData { - /// Encode the leaf node. - pub fn encode(&self) -> NodePreimage { - let mut node = [0u8; 64]; - self.encode_into(&mut node[..]); - node - } - - /// Encode the leaf node into the given slice. It must have length at least 64 or this panics. - pub fn encode_into(&self, buf: &mut [u8]) { - buf[0..32].copy_from_slice(&self.key_path[..]); - buf[32..64].copy_from_slice(&self.value_hash[..]); - } - - /// Decode the leaf node. Fails if the provided slice is not 64 bytes. - pub fn decode(buf: &[u8]) -> Option { - if buf.len() != 64 { - None - } else { - let mut leaf = LeafData { - key_path: Default::default(), - value_hash: Default::default(), - }; - leaf.key_path.copy_from_slice(&buf[..32]); - leaf.value_hash.copy_from_slice(&buf[32..]); - Some(leaf) - } - } -} - /// A trie node hash function specialized for 64 bytes of data. +/// +/// Note that it is illegal for the produced hash to equal [0; 32], as this value is reserved +/// for the terminator node. +/// +/// A node hasher should domain-separate internal and leaf nodes in some specific way. The +/// recommended approach for binary hashes is to set the MSB to 0 or 1 depending on the node kind. +/// However, for other kinds of hashes (e.g. Poseidon2 or other algebraic hashes), other labeling +/// schemes may be required. pub trait NodeHasher { - /// Hash a node, encoded as exactly 64 bytes of data. This should not - /// domain-separate the hash. - fn hash_node(data: &NodePreimage) -> [u8; 32]; -} + /// Hash a leaf. This should domain-separate the hash + /// according to the node kind. + fn hash_leaf(data: &LeafData) -> [u8; 32]; -pub trait NodeHasherExt: NodeHasher { - /// Hash an internal node. This returns a domain-separated hash. - fn hash_internal(internal: &InternalData) -> Node { - let data = internal.encode(); - let mut hash = Self::hash_node(&data); + /// Hash an internal node. This should domain-separate + /// the hash according to the node kind. + fn hash_internal(data: &InternalData) -> [u8; 32]; - // set msb to 0. - hash[0] &= 0b01111111; - hash - } - - /// Hash a leaf node. This returns a domain-separated hash. - fn hash_leaf(leaf: &LeafData) -> Node { - let data = leaf.encode(); - let mut hash = Self::hash_node(&data); - - // set msb to 1 - hash[0] |= 0b10000000; - hash - } + /// Get the kind of the given node. + fn node_kind(node: &Node) -> NodeKind; } - -impl NodeHasherExt for T {} diff --git a/core/src/update.rs b/core/src/update.rs index 488b8e6a..4d8a6f43 100644 --- a/core/src/update.rs +++ b/core/src/update.rs @@ -1,6 +1,6 @@ //! Trie update logic helpers. -use crate::trie::{self, KeyPath, LeafData, Node, NodeHasher, NodeHasherExt, ValueHash}; +use crate::trie::{self, KeyPath, LeafData, Node, NodeHasher, ValueHash}; use bitvec::prelude::*; @@ -252,16 +252,43 @@ pub fn build_trie( #[cfg(test)] mod tests { - use super::{ - bitvec, build_trie, trie, BitVec, LeafData, Msb0, Node, NodeHasher, NodeHasherExt, - WriteNode, - }; + use crate::trie::{NodeKind, TERMINATOR}; + + use super::{bitvec, build_trie, trie, BitVec, LeafData, Msb0, Node, NodeHasher, WriteNode}; struct DummyNodeHasher; impl NodeHasher for DummyNodeHasher { - fn hash_node(data: &trie::NodePreimage) -> [u8; 32] { - blake3::hash(data).into() + fn hash_leaf(data: &trie::LeafData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.key_path); + hasher.update(&data.value_hash); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] |= 0b10000000; + hash + } + + fn hash_internal(data: &trie::InternalData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.left); + hasher.update(&data.right); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] &= 0b01111111; + hash + } + + fn node_kind(node: &Node) -> NodeKind { + if node[0] >> 7 == 1 { + NodeKind::Leaf + } else if node == &TERMINATOR { + NodeKind::Terminator + } else { + NodeKind::Internal + } } } diff --git a/examples/witness_verification/src/main.rs b/examples/witness_verification/src/main.rs index 8d92fe29..f8bd3b8d 100644 --- a/examples/witness_verification/src/main.rs +++ b/examples/witness_verification/src/main.rs @@ -1,7 +1,7 @@ use anyhow::Result; use nomt_core::{ proof, - trie::{LeafData, NodeHasher}, + trie::{self, LeafData, Node, NodeHasher, NodeKind, TERMINATOR}, }; // Hash nodes using blake3. The Hasher below will be utilized in the witness @@ -10,8 +10,36 @@ use nomt_core::{ pub struct Blake3Hasher; impl NodeHasher for Blake3Hasher { - fn hash_node(data: &nomt_core::trie::NodePreimage) -> [u8; 32] { - blake3::hash(data).into() + fn hash_leaf(data: &trie::LeafData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.key_path); + hasher.update(&data.value_hash); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] |= 0b10000000; + hash + } + + fn hash_internal(data: &trie::InternalData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.left); + hasher.update(&data.right); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] &= 0b01111111; + hash + } + + fn node_kind(node: &Node) -> NodeKind { + if node[0] >> 7 == 1 { + NodeKind::Leaf + } else if node == &TERMINATOR { + NodeKind::Terminator + } else { + NodeKind::Internal + } } } diff --git a/fuzz/fuzz_targets/api_surface.rs b/fuzz/fuzz_targets/api_surface.rs index 0f29a21e..ef9950a8 100644 --- a/fuzz/fuzz_targets/api_surface.rs +++ b/fuzz/fuzz_targets/api_surface.rs @@ -5,7 +5,9 @@ use std::collections::{HashMap, HashSet}; use arbitrary::Arbitrary; use libfuzzer_sys::fuzz_target; -use nomt::{Blake3Hasher, KeyPath, KeyReadWrite, Nomt, Options, SessionParams, Value, WitnessMode}; +use nomt::{ + trie::KeyPath, Blake3Hasher, KeyReadWrite, Nomt, Options, SessionParams, Value, WitnessMode, +}; fuzz_target!(|run: Run| { let db = open_db(run.commit_concurrency); diff --git a/nomt/src/lib.rs b/nomt/src/lib.rs index a7165526..757c940d 100644 --- a/nomt/src/lib.rs +++ b/nomt/src/lib.rs @@ -11,7 +11,7 @@ use merkle::{UpdatePool, Updater}; use nomt_core::{ page_id::ROOT_PAGE_ID, proof::PathProof, - trie::{self, InternalData, NodeHasher, NodeHasherExt, ValueHash, TERMINATOR}, + trie::{InternalData, KeyPath, LeafData, Node, NodeHasher, NodeKind, ValueHash, TERMINATOR}, trie_pos::TriePosition, }; use overlay::{LiveOverlay, OverlayMarker}; @@ -22,7 +22,7 @@ use store::{Store, ValueTransaction}; // CARGO HACK: silence lint; this is used in integration tests pub use nomt_core::proof; -pub use nomt_core::trie::{KeyPath, LeafData, Node, NodeKind, NodePreimage}; +pub use nomt_core::trie; pub use options::{Options, PanicOnSyncMode}; pub use overlay::{InvalidAncestors, Overlay}; pub use store::HashTableUtilization; @@ -777,11 +777,38 @@ pub trait ValueHasher { pub struct Blake3Hasher; impl NodeHasher for Blake3Hasher { - fn hash_node(data: &NodePreimage) -> [u8; 32] { - blake3::hash(data).into() + fn hash_leaf(data: &trie::LeafData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.key_path); + hasher.update(&data.value_hash); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] |= 0b10000000; + hash + } + + fn hash_internal(data: &trie::InternalData) -> [u8; 32] { + let mut hasher = blake3::Hasher::new(); + hasher.update(&data.left); + hasher.update(&data.right); + let mut hash: [u8; 32] = hasher.finalize().into(); + + // Label with MSB + hash[0] &= 0b01111111; + hash } -} + fn node_kind(node: &Node) -> NodeKind { + if node[0] >> 7 == 1 { + NodeKind::Leaf + } else if node == &TERMINATOR { + NodeKind::Terminator + } else { + NodeKind::Internal + } + } +} impl ValueHasher for Blake3Hasher { fn hash_value(data: &[u8]) -> [u8; 32] { blake3::hash(data).into() diff --git a/nomt/src/merkle/page_walker.rs b/nomt/src/merkle/page_walker.rs index aa485f2a..6c549501 100644 --- a/nomt/src/merkle/page_walker.rs +++ b/nomt/src/merkle/page_walker.rs @@ -46,7 +46,7 @@ use bitvec::prelude::*; use nomt_core::{ page::DEPTH, page_id::{PageId, ROOT_PAGE_ID}, - trie::{self, KeyPath, Node, NodeHasher, NodeHasherExt, NodeKind, ValueHash, TERMINATOR}, + trie::{self, KeyPath, Node, NodeHasher, NodeKind, ValueHash, TERMINATOR}, trie_pos::TriePosition, update::WriteNode, }; @@ -230,7 +230,7 @@ impl PageWalker { self.prev_node = Some(node); - assert!(!trie::is_internal(&node)); + assert!(!trie::is_internal::(&node)); let start_position = self.position.clone(); @@ -247,9 +247,9 @@ impl PageWalker { // we assume pages are not necessarily zeroed. therefore, there might be // some garbage in the sibling slot we need to clear out. let zero_sibling = if self.position.peek_last_bit() { - trie::is_terminator(&internal_data.left) + trie::is_terminator::(&internal_data.left) } else { - trie::is_terminator(&internal_data.right) + trie::is_terminator::(&internal_data.right) }; if zero_sibling { @@ -454,7 +454,7 @@ impl PageWalker { let sibling = self.sibling_node(); let bit = self.position.peek_last_bit(); - match (NodeKind::of(&node), NodeKind::of(&sibling)) { + match (NodeKind::of::(&node), NodeKind::of::(&sibling)) { (NodeKind::Terminator, NodeKind::Terminator) => { // compact terminators. trie::TERMINATOR @@ -602,8 +602,8 @@ impl PageWalker { #[cfg(test)] mod tests { use super::{ - trie, BucketInfo, Node, NodeHasherExt, Output, PageSet, PageWalker, TriePosition, - UpdatedPage, ROOT_PAGE_ID, + trie, BucketInfo, Node, NodeHasher, Output, PageSet, PageWalker, TriePosition, UpdatedPage, + ROOT_PAGE_ID, }; use crate::{ io::PagePool, diff --git a/nomt/src/merkle/seek.rs b/nomt/src/merkle/seek.rs index 9dd77f26..6c58f900 100644 --- a/nomt/src/merkle/seek.rs +++ b/nomt/src/merkle/seek.rs @@ -47,9 +47,9 @@ impl SeekRequest { key: KeyPath, root: Node, ) -> SeekRequest { - let state = if trie::is_terminator(&root) { + let state = if trie::is_terminator::(&root) { RequestState::Completed(None) - } else if trie::is_leaf(&root) { + } else if trie::is_leaf::(&root) { RequestState::begin_leaf_fetch::(read_transaction, overlay, &TriePosition::new()) } else { RequestState::Seeking @@ -133,14 +133,14 @@ impl SeekRequest { self.siblings.push(page.node(self.position.sibling_index())); } - if trie::is_leaf(&cur_node) { + if trie::is_leaf::(&cur_node) { self.state = RequestState::begin_leaf_fetch::(read_transaction, overlay, &self.position); if let RequestState::FetchingLeaf(_, _) = self.state { do_leaf_fetch = true; } break; - } else if trie::is_terminator(&cur_node) { + } else if trie::is_terminator::(&cur_node) { self.state = RequestState::Completed(None); return; } diff --git a/nomt/tests/add_remove.rs b/nomt/tests/add_remove.rs index 65fc9dfb..48647fcb 100644 --- a/nomt/tests/add_remove.rs +++ b/nomt/tests/add_remove.rs @@ -2,7 +2,7 @@ mod common; use common::Test; use hex_literal::hex; -use nomt::Node; +use nomt::trie::Node; #[test] fn add_remove_1000() { diff --git a/nomt/tests/common/mod.rs b/nomt/tests/common/mod.rs index ce2d835a..b595a042 100644 --- a/nomt/tests/common/mod.rs +++ b/nomt/tests/common/mod.rs @@ -1,6 +1,7 @@ use nomt::{ - KeyPath, KeyReadWrite, Node, Nomt, Options, Overlay, PanicOnSyncMode, Root, Session, - SessionParams, Witness, WitnessMode, + trie::{KeyPath, Node}, + KeyReadWrite, Nomt, Options, Overlay, PanicOnSyncMode, Root, Session, SessionParams, Witness, + WitnessMode, }; use std::{ collections::{hash_map::Entry, HashMap}, diff --git a/nomt/tests/compute_root.rs b/nomt/tests/compute_root.rs index 21c5ee92..ab005910 100644 --- a/nomt/tests/compute_root.rs +++ b/nomt/tests/compute_root.rs @@ -1,13 +1,16 @@ mod common; use common::Test; -use nomt::NodeKind; +use nomt::{trie::NodeKind, Blake3Hasher}; #[test] fn root_on_empty_db() { let t = Test::new("compute_root_empty"); let root = t.root(); - assert_eq!(NodeKind::of(&root.into_inner()), NodeKind::Terminator); + assert_eq!( + NodeKind::of::(&root.into_inner()), + NodeKind::Terminator + ); } #[test] @@ -20,7 +23,10 @@ fn root_on_leaf() { let t = Test::new_with_params("compute_root_leaf", 1, 1, None, false); let root = t.root(); - assert_eq!(NodeKind::of(&root.into_inner()), NodeKind::Leaf); + assert_eq!( + NodeKind::of::(&root.into_inner()), + NodeKind::Leaf + ); } #[test] @@ -34,5 +40,8 @@ fn root_on_internal() { let t = Test::new_with_params("compute_root_internal", 1, 1, None, false); let root = t.root(); - assert_eq!(NodeKind::of(&root.into_inner()), NodeKind::Internal); + assert_eq!( + NodeKind::of::(&root.into_inner()), + NodeKind::Internal + ); } diff --git a/nomt/tests/rollback.rs b/nomt/tests/rollback.rs index cfd4f225..b2742b2a 100644 --- a/nomt/tests/rollback.rs +++ b/nomt/tests/rollback.rs @@ -1,5 +1,5 @@ use hex_literal::hex; -use nomt::{Blake3Hasher, KeyPath, KeyReadWrite, Nomt, Options, SessionParams, Value}; +use nomt::{trie::KeyPath, Blake3Hasher, KeyReadWrite, Nomt, Options, SessionParams, Value}; use std::{ collections::{BTreeMap, BTreeSet}, path::PathBuf, diff --git a/nomt/tests/witness_check.rs b/nomt/tests/witness_check.rs index df2ca4f7..38a6d000 100644 --- a/nomt/tests/witness_check.rs +++ b/nomt/tests/witness_check.rs @@ -1,7 +1,7 @@ mod common; use common::Test; -use nomt::{proof, Blake3Hasher, LeafData}; +use nomt::{proof, trie::LeafData, Blake3Hasher}; #[test] fn produced_witness_validity() {