diff --git a/betree/src/replication/tree.rs b/betree/src/replication/tree.rs index 6300fc4a..c73c976d 100644 --- a/betree/src/replication/tree.rs +++ b/betree/src/replication/tree.rs @@ -1,77 +1,155 @@ -use owning_ref::OwningRef; use parking_lot::{RwLock, RwLockReadGuard}; use pmem_hashmap::allocator::{Pal, PalError, PalPtr}; -/// A basic BTree implementation using PalPtr. -/// -/// - // Order of a BTree -const M: usize = 5; +const B: usize = 16; -struct Node { - values: [Option<(K, V)>; M], - // Fine granular locking, could be a way to do some more efficient inserts *while* reading from the tree. - child: [Child>; M + 1], +pub struct Node { + pivots: [Option; B - 1], + children: [Option>; B], } -enum Child { - Leaf, - Node(PalPtr), +impl std::fmt::Debug for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Node") + .field("pivots", &self.pivots) + .field( + "children", + &self + .children + .iter() + .filter(|e| e.is_some()) + .filter_map(|e| match e.as_ref().unwrap() { + Link::Entry(val) => None, + Link::Child(n) => Some(n.load()), + }) + .collect::>>(), + ) + .finish() + } } +#[derive(Debug)] +pub enum Link { + Entry(V), + Child(PalPtr>), +} + +#[derive(Debug)] pub struct PBTree { root: PalPtr>, } -impl PBTree { - pub fn new(pal: Pal) -> Result { +impl PBTree { + pub fn new(pal: &Pal) -> Result { let mut root = pal.allocate(std::mem::size_of::>())?; root.init(&Node::new(), std::mem::size_of::>()); Ok(Self { root }) } - pub fn get(&self, key: &K) -> Option<&(K, V)> { + pub fn get(&self, key: &K) -> Option<&V> { let mut node = &self.root; loop { match node.load().walk(key) { NodeWalk::Miss => return None, - NodeWalk::Found(idx) => return node.load().get(idx).as_ref(), - NodeWalk::Child(idx) => match node.load().child.get(idx).unwrap() { - Child::Node(ref n) => node = n, - Child::Leaf => unreachable!(), - }, + NodeWalk::Found(idx) => return node.load().get(idx), + NodeWalk::Child(idx) => { + match node.load().children.get(idx).unwrap().as_ref().unwrap() { + Link::Entry(_) => unreachable!(), + Link::Child(ref n) => node = n, + // Child::Node(ref n) => node = n, + // Child::Leaf => unreachable!(), + } + } } } } - pub fn insert(&mut self, key: K, val: V) { - let mut node = &mut self.root; + pub fn insert(&mut self, key: K, val: V, pal: &Pal) { + if let Some((k, v, n)) = self.insert_from(key, val, pal, self.root.clone()) { + assert!(self.insert_from(k, v, pal, n).is_none()); + } + } + + fn insert_from( + &mut self, + key: K, + val: V, + pal: &Pal, + mut from: PalPtr>, + ) -> Option<(K, V, PalPtr>)> { + let mut node = &mut from; let mut path = vec![]; loop { path.push(node.clone()); match node.load().walk(&key) { NodeWalk::Miss => { - if let Some((left, median, right)) = node.load_mut().insert(key, val) { - // Deal with adjacent nodes - todo!(); - for node in path.into_iter().rev() {} + if let Some((median, new_node, value)) = + node.load_mut().insert(key.clone(), val) + { + let mut pair = Some((median, new_node)).map(|(key, new_node)| { + // Allocate the new node + let mut ptr = pal.allocate(std::mem::size_of::>()).unwrap(); + ptr.init(&new_node, std::mem::size_of::>()); + (key, ptr) + }); + for mut cur_node in path.iter_mut().rev().skip(1) { + if let Some((key, new_node)) = pair { + pair = cur_node.load_mut().escalate(key, new_node).map( + |(key, new_node)| { + // Allocate the new node + let mut ptr = pal + .allocate(std::mem::size_of::>()) + .unwrap(); + ptr.init(&new_node, std::mem::size_of::>()); + (key, ptr) + }, + ); + node = cur_node; + } else { + break; + } + } + + // Create a new root node + if let Some((key, new_node)) = pair { + let mut new_root = Node::new(); + new_root.pivots[0] = Some(key); + new_root.children[0] = Some(Link::Child(self.root)); + new_root.children[1] = Some(Link::Child(new_node)); + let mut ptr = pal.allocate(std::mem::size_of::>()).unwrap(); + ptr.init(&new_root, std::mem::size_of::>()); + self.root = ptr; + node = &mut self.root; + } + return Some((key, value, node.clone())); } - return; + return None; } NodeWalk::Found(idx) => { - node.load_mut() - .values + node.load_mut().children.get_mut(idx).unwrap().as_mut().map( + |entry| match entry { + Link::Entry(ref mut v) => *v = val, + Link::Child(_) => unreachable!(), + }, + ); + return None; + } + NodeWalk::Child(idx) => { + match node + .load_mut() + .children .get_mut(idx) .unwrap() .as_mut() - .map(|entry| entry.1 = val); - return; + .unwrap() + { + Link::Entry(_) => unreachable!(), + Link::Child(ref mut n) => node = n, + // Child::Node(ref mut n) => node = n, + // Child::Leaf => unreachable!(), + } } - NodeWalk::Child(idx) => match node.load_mut().child.get_mut(idx).unwrap() { - Child::Node(ref mut n) => node = n, - Child::Leaf => unreachable!(), - }, } } } @@ -83,102 +161,283 @@ enum NodeWalk { Child(usize), } -impl Node { +impl Node { pub fn new() -> Self { + // Node { + // values: [0; B].map(|_| None), + // child: [0; B + 1].map(|_| Child::Leaf), + // } Node { - values: [0; M].map(|_| None), - child: [0; M + 1].map(|_| Child::Leaf), + pivots: [0; B - 1].map(|_| None), + children: [0; B].map(|_| None), } } - pub fn walk(&self, key: &K) -> NodeWalk { - for pos in 0..M { - if let Some(ref pair) = self.values[pos] { - if pair.0 == *key { - return NodeWalk::Found(pos); - } - if pair.0 < *key { - return match self.child[pos] { - Child::Leaf => NodeWalk::Miss, - Child::Node(_) => NodeWalk::Child(pos), - }; - } - } else { - break; + fn walk(&self, key: &K) -> NodeWalk { + let mut idx = 0; + let pos = loop { + if idx >= B - 1 { + break B - 1; } - } - match self.child[M] { - Child::Leaf => NodeWalk::Miss, - Child::Node(_) => NodeWalk::Child(M), + if self.pivots[idx].is_none() { + break idx; + } + if self.pivots[idx].as_ref().unwrap() == key { + // Inspect Child + return match self.children[idx].as_ref().unwrap() { + Link::Entry(_) => NodeWalk::Found(idx), + Link::Child(_) => NodeWalk::Child(idx), + }; + } + if self.pivots[idx].as_ref().unwrap() > key { + break idx; + } + idx += 1; + }; + + // let found = self + // .pivots + // .iter() + // .enumerate() + // .filter(|p| p.1.is_some()) + // .find(|p| p.1.as_ref().unwrap() >= key) + // .unwrap_or((B - 1, &None)); + match self.children[pos] { + Some(ref ptr) => match ptr { + Link::Entry(_) => NodeWalk::Miss, + Link::Child(ref child) => NodeWalk::Child(idx), + }, + None => NodeWalk::Miss, } } - pub fn insert(&mut self, key: K, value: V) -> Option<(Node, (K, V), Node)> { - if self.values.last().is_some() { - // TODO: Split the node and insert value - let mut res = self.split_at(M / 2); - if key <= res.1 .0 { - assert!(res.0.insert(key, value).is_none()); - } else { - assert!(res.2.insert(key, value).is_none()); - } - Some(res) + pub fn insert(&mut self, key: K, value: V) -> Option<(K, Node, V)> { + if self.pivots.last().unwrap().is_none() { + // Enough space + // let entry = self + // .pivots + // .iter_mut() + // .zip(self.children.iter_mut()) + // .find(|e| e.0.is_none()) + // .expect("Has to exist"); + // *entry.0 = Some(key); + // *entry.1 = Some(Link::Entry(value)); + self.splice(key, value); + None } else { - // Insert entry into remaining space - for entry in self.values.iter_mut() { - if entry.is_none() { - *entry = Some((key, value)); + // Split the node and escalate + let (new_key, mut right) = self.split(); + // assert!(right.insert(key, value).is_none()); + Some((new_key, right, value)) + } + } + + pub fn split(&mut self) -> (K, Node) { + let mut right = Self::new(); + let idx = (B as f32 / 2f32).ceil() as usize; + for (right, left) in right + .pivots + .iter_mut() + .zip(self.pivots[idx + 1..].iter_mut()) + { + *right = left.take(); + } + for (right, left) in right + .children + .iter_mut() + .zip(self.children[idx + 1..].iter_mut()) + { + *right = left.take(); + } + + (self.pivots[idx].as_ref().cloned().unwrap(), right) + } + + pub fn escalate(&mut self, key: K, right: PalPtr>) -> Option<(K, Node)> { + if self.pivots.last().unwrap().is_none() { + // Shift pivot and child + + let mut idx = 0; + let mut k = Some(key); + let mut c = Some(Link::Child(right)); + while idx < B - 1 { + if self.pivots[idx].is_none() { + self.pivots[idx] = k; + self.children[idx + 1] = c; break; } + if self.pivots[idx].is_some() && &self.pivots[idx] > &k { + // shift idx and replace with key + // shift children and replace with right + std::mem::swap(&mut self.pivots[idx], &mut k); + std::mem::swap(&mut self.children[idx + 1], &mut c); + } + idx += 1; } None + } else { + let (upper, mut new_right) = self.split(); + // assert!(new_right.escalate(key, right).is_none()); + Some((upper, new_right)) } } - pub fn get(&self, idx: usize) -> &Option<(K, V)> { - self.values.get(idx).unwrap() + pub fn get(&self, idx: usize) -> Option<&V> { + match self.children[idx].as_ref().unwrap() { + Link::Entry(ref v) => Some(v), + Link::Child(_) => None, + } } pub fn remove(&mut self, key: K) -> Option<(K, V)> { todo!() } - pub fn splice_at(&mut self, kv: (K, V), idx: usize) { - assert!(idx > 0); - assert!(idx < M + 1); - assert!(self.values[M - 1].is_none()); - for cur in (idx..M).rev() { - self.values[cur] = self.values[cur - 1].take(); - } - } + pub fn splice(&mut self, mut key: K, mut val: V) { + assert!(self.pivots.last().unwrap().is_none()); - // Move left and right section of keys down to the - pub fn split_at(&mut self, idx: usize) -> (Node, (K, V), Node) { - let mut left = Self::new(); - let mut right = Self::new(); - let mut cur = 0; + let mut key_entry = Some(key); + let mut val_entry = Some(Link::Entry(val)); - for (pos, c) in left.values.iter_mut().zip(left.child.iter_mut()) { - if cur > idx { + for entry in self.pivots.iter_mut().zip(self.children.iter_mut()) { + if let Some(other_key) = entry.0 { + if other_key > key_entry.as_mut().unwrap() { + std::mem::swap(&mut key_entry, entry.0); + std::mem::swap(entry.1, &mut val_entry) + } + } else { + *entry.0 = key_entry; + *entry.1 = val_entry; break; } - *pos = self.values[cur].take(); - *c = std::mem::replace(&mut self.child[cur], Child::Leaf); - cur += 1; } + } - let median = self.values[cur].take().unwrap(); - cur += 1; + // // Move left and right section of keys down to the + // pub fn split_at(&mut self, idx: usize) -> (Node, (K, V), Node) { + // let mut left = Self::new(); + // let mut right = Self::new(); + // let mut cur = 0; - for (pos, c) in right.values.iter_mut().zip(right.child.iter_mut()) { - if cur == M { - break; + // for (pos, c) in left.values.iter_mut().zip(left.child.iter_mut()) { + // if cur > idx { + // break; + // } + // *pos = self.values[cur].take(); + // *c = std::mem::replace(&mut self.child[cur], Child::Leaf); + // cur += 1; + // } + + // let median = self.values[cur].take().unwrap(); + // cur += 1; + + // for (pos, c) in right.values.iter_mut().zip(right.child.iter_mut()) { + // if cur == B { + // break; + // } + // *pos = self.values[cur].take(); + // *c = std::mem::replace(&mut self.child[cur], Child::Leaf); + // cur += 1; + // } + // right.child[cur - idx + 1] = std::mem::replace(&mut self.child[cur], Child::Leaf); + + // (left, median, right) + // } +} + +#[cfg(test)] +mod tests { + use super::*; + use pmem_hashmap::allocator::Pal; + use std::{collections::HashSet, path::PathBuf, process::Command}; + use tempfile::Builder; + + struct TestFile(PathBuf); + + impl TestFile { + pub fn new() -> Self { + TestFile( + Builder::new() + .tempfile() + .expect("Could not get tmpfile") + .path() + .to_path_buf(), + ) + } + + pub fn path(&self) -> &PathBuf { + &self.0 + } + } + impl Drop for TestFile { + fn drop(&mut self) { + if !Command::new("rm") + .arg(self.0.to_str().expect("Could not pass tmpfile")) + .output() + .expect("Could not delete") + .status + .success() + { + eprintln!("Could not delete tmpfile"); } - *pos = self.values[cur].take(); - *c = std::mem::replace(&mut self.child[cur], Child::Leaf); - cur += 1; } - right.child[cur - idx + 1] = std::mem::replace(&mut self.child[cur], Child::Leaf); + } + + #[test] + fn new() { + let file = TestFile::new(); + let mut pal = Pal::create(file.path(), 32 * 1024 * 1024, 0o666).unwrap(); + let tree: PBTree = PBTree::new(&pal).unwrap(); + } + + #[test] + fn basic_insert() { + let file = TestFile::new(); + let mut pal = Pal::create(file.path(), 32 * 1024 * 1024, 0o666).unwrap(); + let mut tree: PBTree = PBTree::new(&pal).unwrap(); + tree.insert(1, 1, &pal); + } + + #[test] + fn basic_get() { + let file = TestFile::new(); + let mut pal = Pal::create(file.path(), 32 * 1024 * 1024, 0o666).unwrap(); + let mut tree: PBTree = PBTree::new(&pal).unwrap(); + assert!(tree.get(&1).is_none()); + tree.insert(1, 1, &pal); + assert_eq!(tree.get(&1), Some(&1)); + } + + #[test] + fn seq_insert() { + let file = TestFile::new(); + let mut pal = Pal::create(file.path(), 32 * 1024 * 1024, 0o666).unwrap(); + let mut tree: PBTree = PBTree::new(&pal).unwrap(); + + for id in 0..128 { + tree.insert(id, id, &pal); + } + + for id in 0..128 { + assert_eq!(tree.get(&id), Some(&id)); + } + } - (left, median, right) + #[test] + fn rnd_insert() { + let file = TestFile::new(); + let mut pal = Pal::create(file.path(), 32 * 1024 * 1024, 0o666).unwrap(); + let mut tree = PBTree::new(&pal).unwrap(); + + use rand::Rng; + let mut rng = rand::thread_rng(); + let vals = [0u32; 128].map(|_| rng.gen::()); + let set = HashSet::from(vals); + + for id in set.iter() { + tree.insert(id, id, &pal); + } + for id in set.iter() { + assert_eq!(tree.get(&id), Some(&id)); + } } }