From 0194dbaf4a1cf5833a1546b50c30459bc6994a31 Mon Sep 17 00:00:00 2001 From: Declan Kelly Date: Tue, 27 Aug 2024 12:23:01 -0700 Subject: [PATCH] [Capitalize CR title whose length should be < 72 chars] **Description** Fixes https://github.com/declanvk/blart/issues/19 This commit modifies the `IntoIter` implementation to be multiple steps: 1. Deallocate all the inner node of the trie, leaving only the leave nodes 2. Iterate through the leaf nodes by linked list and deallocate the leaf node and return the key-value pairs. 3. If there are any leaf nodes remaining on drop, deallocate all of them This commit also moves the tree dealloc operation into a separate module, and adds a couple variants of the dealloc operation to support the `IntoIter` modifications. **Motivation** This change makes the `IntoIter` more efficient, since we don't have to maintain a valid trie between each call to `next()`. **Testing Done** `./scripts/full-test.sh nightly` --- fuzz/fuzz_targets/fuzz_tree_map_api.rs | 88 +++++++++--- src/collections/map.rs | 3 + src/collections/map/iterators/into_iter.rs | 65 +++++++-- src/nodes/operations.rs | 58 +------- src/nodes/operations/deallocate.rs | 156 +++++++++++++++++++++ 5 files changed, 283 insertions(+), 87 deletions(-) create mode 100644 src/nodes/operations/deallocate.rs diff --git a/fuzz/fuzz_targets/fuzz_tree_map_api.rs b/fuzz/fuzz_targets/fuzz_tree_map_api.rs index 32bd69d7..17a4182a 100644 --- a/fuzz/fuzz_targets/fuzz_tree_map_api.rs +++ b/fuzz/fuzz_targets/fuzz_tree_map_api.rs @@ -1,13 +1,15 @@ #![feature(is_sorted)] #![no_main] -use blart::TreeMap; -use blart::map::Entry; -use blart::map::EntryRef; +use blart::{ + map::{Entry, EntryRef}, + TreeMap, +}; use libfuzzer_sys::arbitrary::{self, Arbitrary}; use std::{ collections::hash_map::RandomState, hash::{BuildHasher, Hash, Hasher}, + mem, }; #[derive(Arbitrary, Debug)] @@ -19,7 +21,7 @@ enum EntryAction { OrInsert, OrInsertWith, OrInsertWithKey, - RemoveEntry + RemoveEntry, } #[derive(Arbitrary, Debug)] @@ -42,6 +44,7 @@ enum Action { EntryRef(EntryAction, Box<[u8]>), Fuzzy(Box<[u8]>), Prefix(Box<[u8]>), + IntoIter(usize), } libfuzzer_sys::fuzz_target!(|actions: Vec| { @@ -123,7 +126,9 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { } }, Action::Clone => { - tree = tree.clone(); + let tree_copy = tree.clone(); + assert!(tree_copy.iter().eq(tree.iter())); + tree = tree_copy; }, Action::Hash => { let hash_builder = RandomState::new(); @@ -139,19 +144,35 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { let value = next_key; next_key += 1; match ea { - EntryAction::AndModify => {entry.and_modify(|v| *v = v.saturating_sub(1));}, - EntryAction::InsertEntry => {entry.insert_entry(value);}, - EntryAction::Key => {entry.key();}, - EntryAction::OrDefault => {entry.or_default();}, - EntryAction::OrInsert => {entry.or_insert(value);}, - EntryAction::OrInsertWith => {entry.or_insert_with(|| value);}, - EntryAction::OrInsertWithKey => {entry.or_insert_with_key(|_| value);}, + EntryAction::AndModify => { + entry.and_modify(|v| *v = v.saturating_sub(1)); + }, + EntryAction::InsertEntry => { + entry.insert_entry(value); + }, + EntryAction::Key => { + entry.key(); + }, + EntryAction::OrDefault => { + entry.or_default(); + }, + EntryAction::OrInsert => { + entry.or_insert(value); + }, + EntryAction::OrInsertWith => { + entry.or_insert_with(|| value); + }, + EntryAction::OrInsertWithKey => { + entry.or_insert_with_key(|_| value); + }, EntryAction::RemoveEntry => { match entry { - blart::map::Entry::Occupied(e) => {e.remove_entry();}, + blart::map::Entry::Occupied(e) => { + e.remove_entry(); + }, blart::map::Entry::Vacant(_) => {}, }; - } + }, }; } }, @@ -160,19 +181,35 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { let value = next_key; next_key += 1; match ea { - EntryAction::AndModify => {entry.and_modify(|v| *v = v.saturating_sub(1));}, - EntryAction::InsertEntry => {entry.insert_entry(value);}, - EntryAction::Key => {entry.key();}, - EntryAction::OrDefault => {entry.or_default();}, - EntryAction::OrInsert => {entry.or_insert(value);}, - EntryAction::OrInsertWith => {entry.or_insert_with(|| value);}, - EntryAction::OrInsertWithKey => {entry.or_insert_with_key(|_| value);}, + EntryAction::AndModify => { + entry.and_modify(|v| *v = v.saturating_sub(1)); + }, + EntryAction::InsertEntry => { + entry.insert_entry(value); + }, + EntryAction::Key => { + entry.key(); + }, + EntryAction::OrDefault => { + entry.or_default(); + }, + EntryAction::OrInsert => { + entry.or_insert(value); + }, + EntryAction::OrInsertWith => { + entry.or_insert_with(|| value); + }, + EntryAction::OrInsertWithKey => { + entry.or_insert_with_key(|_| value); + }, EntryAction::RemoveEntry => { match entry { - blart::map::EntryRef::Occupied(e) => {e.remove_entry();}, + blart::map::EntryRef::Occupied(e) => { + e.remove_entry(); + }, blart::map::EntryRef::Vacant(_) => {}, }; - } + }, }; } }, @@ -184,6 +221,11 @@ libfuzzer_sys::fuzz_target!(|actions: Vec| { let v: Vec<_> = tree.prefix(&key).collect(); std::hint::black_box(v); }, + Action::IntoIter(take) => { + let tree = mem::replace(&mut tree, TreeMap::<_, u32>::new()); + let num_entries = tree.into_iter().take(take).count(); + assert!(num_entries <= take); + }, } } }); diff --git a/src/collections/map.rs b/src/collections/map.rs index 137b38de..919bce1f 100644 --- a/src/collections/map.rs +++ b/src/collections/map.rs @@ -121,6 +121,9 @@ impl TreeMap { /// /// let root = TreeMap::into_raw(map); /// assert!(root.is_some()); + /// + /// // SAFETY: The root pointer came directly from the `into_raw` result. + /// let _map = unsafe { TreeMap::from_raw(root) }.unwrap(); /// ``` pub fn into_raw(tree: Self) -> Option> { // We need this `ManuallyDrop` so that the `TreeMap::drop` is not called. diff --git a/src/collections/map/iterators/into_iter.rs b/src/collections/map/iterators/into_iter.rs index 754594ec..383eb61b 100644 --- a/src/collections/map/iterators/into_iter.rs +++ b/src/collections/map/iterators/into_iter.rs @@ -1,4 +1,6 @@ -use crate::TreeMap; +use std::mem::ManuallyDrop; + +use crate::{deallocate_leaves, deallocate_tree_non_leaves, NodePtr, RawIterator, TreeMap}; /// An owning iterator over the entries of a `TreeMap`. /// @@ -7,11 +9,48 @@ use crate::TreeMap; /// /// [`into_iter`]: IntoIterator::into_iter /// [`IntoIterator`]: core::iter::IntoIterator -pub struct IntoIter(TreeMap); +#[derive(Debug)] +pub struct IntoIter { + inner: RawIterator, + size: usize, +} + +impl Drop for IntoIter { + fn drop(&mut self) { + if let Some(next) = unsafe { self.inner.next() } { + // SAFETY: TODO + unsafe { deallocate_leaves(next) } + } + + // Just to be safe, clear the iterator + self.inner = RawIterator::empty(); + self.size = 0; + } +} impl IntoIter { pub(crate) fn new(tree: TreeMap) -> Self { - IntoIter(tree) + // We need to inhibit `TreeMap::drop` since it would cause a double-free + // otherwise. + let tree = ManuallyDrop::new(tree); + + if let Some(state) = &tree.state { + let inner = unsafe { RawIterator::new(state.min_leaf, state.max_leaf) }; + + // SAFETY: Since this function takes an owned `TreeMap`, we can assume there is + // no concurrent read or modification, that this is a unique handle to the trie. + unsafe { deallocate_tree_non_leaves(state.root) } + + Self { + inner, + size: tree.num_entries, + } + } else { + Self { + inner: RawIterator::empty(), + size: 0, + } + } } } @@ -19,19 +58,29 @@ impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { - // TODO(#19): Optimize `IntoIter` by not maintaining a valid tree throughout - // iteration - self.0.pop_first() + // SAFETY: TODO + let leaf_ptr = unsafe { self.inner.next() }; + + leaf_ptr.map(|leaf_ptr| { + // SAFETY: TODO + unsafe { NodePtr::deallocate_node_ptr(leaf_ptr) }.into_entry() + }) } fn size_hint(&self) -> (usize, Option) { - (self.0.len(), Some(self.0.len())) + (self.size, Some(self.size)) } } impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { - self.0.pop_last() + // SAFETY: TODO + let leaf_ptr = unsafe { self.inner.next_back() }; + + leaf_ptr.map(|leaf_ptr| { + // SAFETY: TODO + unsafe { NodePtr::deallocate_node_ptr(leaf_ptr) }.into_entry() + }) } } diff --git a/src/nodes/operations.rs b/src/nodes/operations.rs index 90e09efd..01fe85e8 100644 --- a/src/nodes/operations.rs +++ b/src/nodes/operations.rs @@ -1,7 +1,5 @@ //! Trie node lookup and manipulation -use crate::{ConcreteNodePtr, InnerNode, NodePtr, OpaqueNodePtr}; - mod insert; pub(crate) use insert::*; @@ -14,57 +12,5 @@ pub(crate) use lookup::*; mod delete; pub(crate) use delete::*; -/// Deallocate the given node and all children of the given node. -/// -/// This will also deallocate the leaf nodes with their value type data. -/// -/// # Safety -/// - This function must only be called once for this root node and all -/// descendants, otherwise a double-free could result. -pub unsafe fn deallocate_tree( - root: OpaqueNodePtr, -) { - fn deallocate_inner_node( - stack: &mut Vec>, - inner_ptr: NodePtr, - ) where - N: InnerNode, - { - { - // SAFETY: The scope of this reference is bounded and we enforce that no - // mutation of the reference memory takes place within the lifetime. The - // deallocation of the node happens outside of this block, after the lifetime - // ends. - let inner_node = unsafe { inner_ptr.as_ref() }; - - // SAFETY: This iterator only lives for this block, a subset of the shared - // lifetime of the `inner_node` variable. By the safety requirements of the - // `deallocate_tree` function, no other mutation of this node can happen while - // this iterator is live. - let iter = inner_node.iter(); - stack.extend(iter.map(|(_, child)| child)); - } - - // SAFETY: The single call per node requirement is enforced by the safety - // requirements on this function. - drop(unsafe { NodePtr::deallocate_node_ptr(inner_ptr) }); - } - - let mut stack = Vec::new(); - - stack.push(root); - - while let Some(next_node_ptr) = stack.pop() { - match next_node_ptr.to_node_ptr() { - ConcreteNodePtr::Node4(inner_ptr) => deallocate_inner_node(&mut stack, inner_ptr), - ConcreteNodePtr::Node16(inner_ptr) => deallocate_inner_node(&mut stack, inner_ptr), - ConcreteNodePtr::Node48(inner_ptr) => deallocate_inner_node(&mut stack, inner_ptr), - ConcreteNodePtr::Node256(inner_ptr) => deallocate_inner_node(&mut stack, inner_ptr), - ConcreteNodePtr::LeafNode(inner) => { - // SAFETY: The single call per node requirement is enforced by the safety - // requirements on this function. - drop(unsafe { NodePtr::deallocate_node_ptr(inner) }) - }, - } - } -} +mod deallocate; +pub(crate) use deallocate::*; diff --git a/src/nodes/operations/deallocate.rs b/src/nodes/operations/deallocate.rs new file mode 100644 index 00000000..9486e217 --- /dev/null +++ b/src/nodes/operations/deallocate.rs @@ -0,0 +1,156 @@ +use crate::{ConcreteNodePtr, InnerNode, LeafNode, NodePtr, OpaqueNodePtr}; + +/// Deallocate all the leaf nodes in the linked list starting from `start`. +/// +/// # Safety +/// - This function must only be called once for this `start` node and all +/// following leaf nodes in the linked list, otherwise a double-free could +/// result. +/// - This function should not be called concurrently with any read of the +/// tree, otherwise it could result in a use-after-free. +pub unsafe fn deallocate_leaves( + start: NodePtr>, +) { + let mut current = start; + + loop { + // SAFETY: Covered by function safety doc + let leaf = unsafe { NodePtr::deallocate_node_ptr(current) }; + + if let Some(next) = leaf.next { + current = next; + } else { + break; + } + } +} + +/// Deallocate the given node and all children of the given node except for +/// leaf nodes. +/// +/// # Safety +/// - This function must only be called once for this root node and all +/// descendants, otherwise a double-free could result. +/// - This function should not be called concurrently with any read of the +/// tree, otherwise it could result in a use-after-free. +pub unsafe fn deallocate_tree_non_leaves( + root: OpaqueNodePtr, +) { + fn not_leaf(node: &OpaqueNodePtr) -> bool { + !node.is::>() + } + + if root.is::>() { + return; + } + + let mut stack = Vec::new(); + + stack.push(root); + + while let Some(next_node_ptr) = stack.pop() { + match next_node_ptr.to_node_ptr() { + ConcreteNodePtr::Node4(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, not_leaf) + }, + ConcreteNodePtr::Node16(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, not_leaf) + }, + ConcreteNodePtr::Node48(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, not_leaf) + }, + ConcreteNodePtr::Node256(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, not_leaf) + }, + ConcreteNodePtr::LeafNode(_) => { + unreachable!("should never encounter a leaf node") + }, + } + } +} + +/// Deallocate the given node and all children of the given node. +/// +/// This will also deallocate the leaf nodes with their value type data. +/// +/// # Safety +/// - This function must only be called once for this root node and all +/// descendants, otherwise a double-free could result. +/// - This function should not be called concurrently with any read of the +/// tree, otherwise it could result in a use-after-free. +pub unsafe fn deallocate_tree( + root: OpaqueNodePtr, +) { + fn accept_all(_: &OpaqueNodePtr) -> bool { + true + } + + let mut stack = Vec::new(); + + stack.push(root); + + while let Some(next_node_ptr) = stack.pop() { + match next_node_ptr.to_node_ptr() { + ConcreteNodePtr::Node4(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, accept_all) + }, + ConcreteNodePtr::Node16(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, accept_all) + }, + ConcreteNodePtr::Node48(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, accept_all) + }, + ConcreteNodePtr::Node256(inner_ptr) => unsafe { + // SAFETY: Covered by function safety doc + deallocate_inner_node(&mut stack, inner_ptr, accept_all) + }, + ConcreteNodePtr::LeafNode(inner) => { + // SAFETY: The single call per node requirement is enforced by the safety + // requirements on this function. + drop(unsafe { NodePtr::deallocate_node_ptr(inner) }) + }, + } + } +} + +/// This function will read an [`InnerNode`] and place all children that +/// satisfy the `filter_children` predicate on the `stack`. Then it will +/// deallocate the given [`InnerNode`]. +/// +/// # Safety +/// - This function must only be called once for this inner node +/// - This function cannot be called concurrently with any read of modification +/// of the trie. +unsafe fn deallocate_inner_node( + stack: &mut Vec>, + inner_ptr: NodePtr, + filter_children: for<'a> fn(&'a OpaqueNodePtr) -> bool, +) where + N: InnerNode, +{ + { + // SAFETY: The scope of this reference is bounded and we enforce that no + // mutation of the reference memory takes place within the lifetime. The + // deallocation of the node happens outside of this block, after the lifetime + // ends. + let inner_node = unsafe { inner_ptr.as_ref() }; + + // SAFETY: This iterator only lives for this block, a subset of the shared + // lifetime of the `inner_node` variable. By the safety requirements of the + // `deallocate_inner_node` function, no other mutation of this node can happen + // while this iterator is live. + let iter = inner_node.iter(); + stack.extend(iter.map(|(_, child)| child).filter(filter_children)); + } + + // SAFETY: The single call per node requirement is enforced by the safety + // requirements on this function. + drop(unsafe { NodePtr::deallocate_node_ptr(inner_ptr) }); +}