From 52c8633feeeac4103f4ce8848ba0ac101de35b5c Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 1 Oct 2024 16:44:27 +0200 Subject: [PATCH 01/19] use byteview as slice type --- src/slice.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/slice.rs b/src/slice.rs index 0020988..c26ace7 100644 --- a/src/slice.rs +++ b/src/slice.rs @@ -2,12 +2,12 @@ // This source code is licensed under both the Apache 2.0 and MIT License // (found in the LICENSE-* files in the repository) -use std::hash::Hash; -use std::sync::Arc; +use byteview::ByteView; +use std::{hash::Hash, sync::Arc}; /// An immutable byte slice that can be cloned without additional heap allocation #[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] -pub struct Slice(Arc<[u8]>); +pub struct Slice(ByteView); impl Slice { /// Construct a [`Slice`] from a byte slice. @@ -15,6 +15,12 @@ impl Slice { pub fn new(bytes: &[u8]) -> Self { Self::from(bytes) } + + #[doc(hidden)] + #[must_use] + pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { + Self(self.0.slice(range)) + } } impl std::borrow::Borrow<[u8]> for Slice { @@ -78,7 +84,7 @@ impl From<&[u8]> for Slice { impl From> for Slice { fn from(value: Arc<[u8]>) -> Self { - Self(value) + Self(ByteView::from(value)) } } From 7b5ccf579a34cc759958eaef146ccd1caf075a3b Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 1 Oct 2024 16:47:44 +0200 Subject: [PATCH 02/19] wip --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 3dbd56b..30e6b96 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ serde = ["dep:serde"] [dependencies] byteorder = "1.5.0" +byteview = { path = "../thin-slice" } log = "0.4.22" min-max-heap = "1.3.0" path-absolutize = "3.1.1" From a4b86ba2f79bb111af100339caa96bf41145fcf4 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 27 Oct 2024 23:56:54 +0100 Subject: [PATCH 03/19] wip --- src/slice.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/slice.rs b/src/slice.rs index c26ace7..3721217 100644 --- a/src/slice.rs +++ b/src/slice.rs @@ -16,6 +16,22 @@ impl Slice { Self::from(bytes) } + #[doc(hidden)] + #[must_use] + pub fn with_size(len: usize) -> Self { + Self(ByteView::with_size(len)) + } + + // TODO: get_mut, update_prefix should probably be unsafe + #[doc(hidden)] + pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { + let mut view = Self::with_size(len); + let builder = view.0.get_mut().expect("we are the owner"); + reader.read_exact(builder)?; + view.0.update_prefix(); + Ok(view) + } + #[doc(hidden)] #[must_use] pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { From 5be184e06c8e8c898e8d81ab5df00f44bb6a02db Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 4 Dec 2024 19:23:56 +0100 Subject: [PATCH 04/19] wip --- src/segment/reader.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/segment/reader.rs b/src/segment/reader.rs index 7bfeb90..e98e23a 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -99,6 +99,7 @@ impl Iterator for Reader { } }; + // TODO: optimize using Slice::from_reader let mut key = vec![0; key_len.into()]; if let Err(e) = self.inner.read_exact(&mut key) { return Some(Err(e.into())); @@ -114,6 +115,7 @@ impl Iterator for Reader { } }; + // TODO: optimize using Slice::from_reader let mut val = vec![0; val_len as usize]; if let Err(e) = self.inner.read_exact(&mut val) { return Some(Err(e.into())); From ab7c10b761c003758deb326e92cd15c5a399b11b Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 12 Jan 2025 19:46:08 +0100 Subject: [PATCH 05/19] use ByteView::from_reader --- src/slice/slice_arc.rs | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/slice/slice_arc.rs b/src/slice/slice_arc.rs index 03b8e34..cf903b9 100644 --- a/src/slice/slice_arc.rs +++ b/src/slice/slice_arc.rs @@ -24,16 +24,8 @@ impl Slice { #[doc(hidden)] pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { - use std::ops::DerefMut; - - // TODO: impl from_reader in Byteview, can skip get_mut in "constructor" - let mut view = Self::with_size(len); - { - let mut builder = view.0.get_mut().expect("we are the owner"); - reader.read_exact(builder.deref_mut())?; - } - - Ok(view) + let view = ByteView::from_reader(reader, len)?; + Ok(Self(view)) } } From 58da0754a9abfc49c89c8143fd67c37c9250821d Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 21 Jan 2025 21:40:05 +0100 Subject: [PATCH 06/19] add Slice::slice --- Cargo.toml | 2 +- src/slice/slice_arc.rs | 6 ++++++ src/slice/slice_bytes.rs | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b52f2c9..e01b6d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "value-log" description = "Value log implementation for key-value separated LSM storage" license = "MIT OR Apache-2.0" -version = "1.4.1" +version = "1.5.0" edition = "2021" rust-version = "1.74.0" readme = "README.md" diff --git a/src/slice/slice_arc.rs b/src/slice/slice_arc.rs index cf903b9..f764f89 100644 --- a/src/slice/slice_arc.rs +++ b/src/slice/slice_arc.rs @@ -16,6 +16,12 @@ impl Slice { Self(bytes.into()) } + #[doc(hidden)] + #[must_use] + pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { + Self(self.0.slice(range)) + } + #[must_use] #[doc(hidden)] pub fn with_size(len: usize) -> Self { diff --git a/src/slice/slice_bytes.rs b/src/slice/slice_bytes.rs index 9d7c608..34bb521 100644 --- a/src/slice/slice_bytes.rs +++ b/src/slice/slice_bytes.rs @@ -16,6 +16,12 @@ impl Slice { Self(Bytes::copy_from_slice(bytes)) } + #[doc(hidden)] + #[must_use] + pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { + Self(self.0.slice(range)) + } + #[doc(hidden)] pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { let mut builder = BytesMut::zeroed(len); From 12a04cba9d7bfc410408822610e6b80e169b9548 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Tue, 21 Jan 2025 21:40:05 +0100 Subject: [PATCH 07/19] add Slice::slice --- Cargo.toml | 4 ++-- src/slice/slice_arc.rs | 6 ++++++ src/slice/slice_bytes.rs | 6 ++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b52f2c9..8f9b1c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "value-log" description = "Value log implementation for key-value separated LSM storage" license = "MIT OR Apache-2.0" -version = "1.4.1" +version = "1.5.0" edition = "2021" rust-version = "1.74.0" readme = "README.md" @@ -24,7 +24,7 @@ bytes = ["dep:bytes"] [dependencies] bytes = { version = "1.8.0", optional = true } byteorder = "1.5.0" -byteview = { path = "../thin-slice" } +byteview = "0.3.0" log = "0.4.22" min-max-heap = "1.3.0" path-absolutize = "3.1.1" diff --git a/src/slice/slice_arc.rs b/src/slice/slice_arc.rs index cf903b9..f764f89 100644 --- a/src/slice/slice_arc.rs +++ b/src/slice/slice_arc.rs @@ -16,6 +16,12 @@ impl Slice { Self(bytes.into()) } + #[doc(hidden)] + #[must_use] + pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { + Self(self.0.slice(range)) + } + #[must_use] #[doc(hidden)] pub fn with_size(len: usize) -> Self { diff --git a/src/slice/slice_bytes.rs b/src/slice/slice_bytes.rs index 9d7c608..34bb521 100644 --- a/src/slice/slice_bytes.rs +++ b/src/slice/slice_bytes.rs @@ -16,6 +16,12 @@ impl Slice { Self(Bytes::copy_from_slice(bytes)) } + #[doc(hidden)] + #[must_use] + pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { + Self(self.0.slice(range)) + } + #[doc(hidden)] pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { let mut builder = BytesMut::zeroed(len); From c0f2cae6f33448868b3b38d1658143c201731921 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 21:39:36 +0100 Subject: [PATCH 08/19] remove comments --- src/segment/reader.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/segment/reader.rs b/src/segment/reader.rs index e98e23a..7bfeb90 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -99,7 +99,6 @@ impl Iterator for Reader { } }; - // TODO: optimize using Slice::from_reader let mut key = vec![0; key_len.into()]; if let Err(e) = self.inner.read_exact(&mut key) { return Some(Err(e.into())); @@ -115,7 +114,6 @@ impl Iterator for Reader { } }; - // TODO: optimize using Slice::from_reader let mut val = vec![0; val_len as usize]; if let Err(e) = self.inner.read_exact(&mut val) { return Some(Err(e.into())); From 82ab3f54056fc867fc57577abcd3ab470e95c954 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:17:35 +0100 Subject: [PATCH 09/19] fix: bytes from impl --- src/slice.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/slice.rs b/src/slice.rs index 928d84c..3b5c244 100644 --- a/src/slice.rs +++ b/src/slice.rs @@ -26,7 +26,15 @@ impl AsRef<[u8]> for Slice { impl From<&[u8]> for Slice { fn from(value: &[u8]) -> Self { - Self(value.into()) + #[cfg(not(feature = "bytes"))] + { + Self(byteview::ByteView::new(value)) + } + + #[cfg(feature = "bytes")] + { + Self(bytes::Bytes::from(value.to_vec())) + } } } From bde4dc3df43bf7290e480e61715ab75081902d2a Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:22:18 +0100 Subject: [PATCH 10/19] use interval heap --- Cargo.toml | 2 +- src/segment/merge.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f9b1c2..51e2a58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,8 +25,8 @@ bytes = ["dep:bytes"] bytes = { version = "1.8.0", optional = true } byteorder = "1.5.0" byteview = "0.3.0" +interval-heap = "0.0.5" log = "0.4.22" -min-max-heap = "1.3.0" path-absolutize = "3.1.1" quick_cache = { version = "0.6.5", default-features = false } rustc-hash = "2.0.0" diff --git a/src/segment/merge.rs b/src/segment/merge.rs index 587042c..af37217 100644 --- a/src/segment/merge.rs +++ b/src/segment/merge.rs @@ -3,11 +3,9 @@ // (found in the LICENSE-* files in the repository) use crate::{id::SegmentId, value::UserKey, Compressor, SegmentReader, UserValue}; +use interval_heap::IntervalHeap; use std::cmp::Reverse; -// TODO: replace with MinHeap... -use min_max_heap::MinMaxHeap; - type IteratorIndex = usize; #[derive(Debug)] @@ -42,16 +40,14 @@ impl Ord for IteratorValue { #[allow(clippy::module_name_repetitions)] pub struct MergeReader { readers: Vec>, - heap: MinMaxHeap, + heap: IntervalHeap, } impl MergeReader { /// Initializes a new merging reader pub fn new(readers: Vec>) -> Self { - Self { - readers, - heap: MinMaxHeap::new(), - } + let heap = IntervalHeap::with_capacity(readers.len()); + Self { readers, heap } } fn advance_reader(&mut self, idx: usize) -> crate::Result<()> { From f73c8b0a5cc91a7e6917020ba19937d2ad468d0d Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:22:26 +0100 Subject: [PATCH 11/19] relax bytes semver --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 51e2a58..0db65d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ serde = ["dep:serde"] bytes = ["dep:bytes"] [dependencies] -bytes = { version = "1.8.0", optional = true } +bytes = { version = "1", optional = true } byteorder = "1.5.0" byteview = "0.3.0" interval-heap = "0.0.5" From c968c0f770a1320bef18296451e380f2d19f874c Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:30:23 +0100 Subject: [PATCH 12/19] perf: refactor segment reader skip heap allocation for no compression --- src/segment/reader.rs | 70 ++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/src/segment/reader.rs b/src/segment/reader.rs index 7bfeb90..51f2dd0 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -3,7 +3,7 @@ // (found in the LICENSE-* files in the repository) use super::{meta::METADATA_HEADER_MAGIC, writer::BLOB_HEADER_MAGIC}; -use crate::{coding::DecodeError, id::SegmentId, value::UserKey, Compressor, UserValue}; +use crate::{coding::DecodeError, id::SegmentId, value::UserKey, Compressor, Slice, UserValue}; use byteorder::{BigEndian, ReadBytesExt}; use std::{ fs::File, @@ -11,6 +11,15 @@ use std::{ path::Path, }; +macro_rules! fail_iter { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => return Some(Err(e.into())), + } + }; +} + /// Reads through a segment in order. pub struct Reader { pub(crate) segment_id: SegmentId, @@ -79,54 +88,25 @@ impl Iterator for Reader { } } - let checksum = match self.inner.read_u64::() { - Ok(v) => v, - Err(e) => { - if e.kind() == std::io::ErrorKind::UnexpectedEof { - return None; - } - return Some(Err(e.into())); - } - }; + let checksum = fail_iter!(self.inner.read_u64::()); - let key_len = match self.inner.read_u16::() { - Ok(v) => v, - Err(e) => { - if e.kind() == std::io::ErrorKind::UnexpectedEof { - return None; - } - return Some(Err(e.into())); - } - }; - - let mut key = vec![0; key_len.into()]; - if let Err(e) = self.inner.read_exact(&mut key) { - return Some(Err(e.into())); - }; - - let val_len = match self.inner.read_u32::() { - Ok(v) => v, - Err(e) => { - if e.kind() == std::io::ErrorKind::UnexpectedEof { - return None; - } - return Some(Err(e.into())); - } - }; - - let mut val = vec![0; val_len as usize]; - if let Err(e) = self.inner.read_exact(&mut val) { - return Some(Err(e.into())); - }; + let key_len = fail_iter!(self.inner.read_u16::()); + let key = fail_iter!(Slice::from_reader(&mut self.inner, key_len as usize)); + let val_len = fail_iter!(self.inner.read_u32::()); let val = match &self.compression { - Some(compressor) => match compressor.decompress(&val) { - Ok(val) => val, - Err(e) => return Some(Err(e)), - }, - None => val, + Some(compressor) => { + let mut val = vec![0; val_len as usize]; + fail_iter!(self.inner.read_exact(&mut val)); + Slice::from(fail_iter!(compressor.decompress(&val))) + } + None => { + // NOTE: When not using compression, we can skip + // the intermediary heap allocation and read directly into a Slice + fail_iter!(Slice::from_reader(&mut self.inner, val_len as usize)) + } }; - Some(Ok((key.into(), val.into(), checksum))) + Some(Ok((key, val, checksum))) } } From c0a7fab96b2f5e5db18ab438807654a7be0f46de Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:30:48 +0100 Subject: [PATCH 13/19] refactor --- src/segment/reader.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/segment/reader.rs b/src/segment/reader.rs index 51f2dd0..5ba0a78 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -71,10 +71,7 @@ impl Iterator for Reader { { let mut buf = [0; BLOB_HEADER_MAGIC.len()]; - - if let Err(e) = self.inner.read_exact(&mut buf) { - return Some(Err(e.into())); - }; + fail_iter!(self.inner.read_exact(&mut buf)); if buf == METADATA_HEADER_MAGIC { self.is_terminated = true; From 025a02ad078ebec6420bd334183136721872f8a0 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Wed, 22 Jan 2025 22:31:44 +0100 Subject: [PATCH 14/19] refactor --- src/segment/merge.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/segment/merge.rs b/src/segment/merge.rs index af37217..5582a72 100644 --- a/src/segment/merge.rs +++ b/src/segment/merge.rs @@ -6,6 +6,15 @@ use crate::{id::SegmentId, value::UserKey, Compressor, SegmentReader, UserValue} use interval_heap::IntervalHeap; use std::cmp::Reverse; +macro_rules! fail_iter { + ($e:expr) => { + match $e { + Ok(v) => v, + Err(e) => return Some(Err(e.into())), + } + }; +} + type IteratorIndex = usize; #[derive(Debug)] @@ -83,22 +92,16 @@ impl Iterator for MergeReader { fn next(&mut self) -> Option { if self.heap.is_empty() { - if let Err(e) = self.push_next() { - return Some(Err(e)); - }; + fail_iter!(self.push_next()); } if let Some(head) = self.heap.pop_min() { - if let Err(e) = self.advance_reader(head.index) { - return Some(Err(e)); - } + fail_iter!(self.advance_reader(head.index)); // Discard old items while let Some(next) = self.heap.pop_min() { if next.key == head.key { - if let Err(e) = self.advance_reader(next.index) { - return Some(Err(e)); - } + fail_iter!(self.advance_reader(next.index)); } else { // Reached next user key now // Push back non-conflicting item and exit From 857d16d03ef592e0d73e6d90afd17b36f519f9b4 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Fri, 24 Jan 2025 19:16:14 +0100 Subject: [PATCH 15/19] wip --- src/slice.rs | 12 ++++++++++++ src/slice/slice_arc.rs | 7 +++++++ src/slice/slice_bytes.rs | 14 ++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/slice.rs b/src/slice.rs index 3b5c244..28a8c23 100644 --- a/src/slice.rs +++ b/src/slice.rs @@ -188,6 +188,7 @@ mod serde { mod tests { use super::Slice; use std::{fmt::Debug, sync::Arc}; + use test_log::test; fn assert_slice_handles(v: T) where @@ -200,6 +201,17 @@ mod tests { assert!(slice >= v, "slice_arc: {slice:?}, v: {v:?}"); } + #[test] + fn slice_empty() { + assert_eq!(Slice::empty(), []); + } + + #[test] + fn slice_with_size() { + assert_eq!(Slice::with_size(5), [0, 0, 0, 0, 0]); + assert_eq!(Slice::with_size(50), [0; 50]); + } + /// This test verifies that we can create a `Slice` from various types and compare a `Slice` with them. #[test] fn test_slice_instantiation() { diff --git a/src/slice/slice_arc.rs b/src/slice/slice_arc.rs index f764f89..e11bd1b 100644 --- a/src/slice/slice_arc.rs +++ b/src/slice/slice_arc.rs @@ -16,6 +16,12 @@ impl Slice { Self(bytes.into()) } + #[doc(hidden)] + #[must_use] + pub fn empty() -> Self { + Self(ByteView::new(&[])) + } + #[doc(hidden)] #[must_use] pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { @@ -28,6 +34,7 @@ impl Slice { Self(ByteView::with_size(len)) } + /// Constructs a [`Slice`] from an I/O reader by pulling in `len` bytes. #[doc(hidden)] pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { let view = ByteView::from_reader(reader, len)?; diff --git a/src/slice/slice_bytes.rs b/src/slice/slice_bytes.rs index 34bb521..824c530 100644 --- a/src/slice/slice_bytes.rs +++ b/src/slice/slice_bytes.rs @@ -16,12 +16,26 @@ impl Slice { Self(Bytes::copy_from_slice(bytes)) } + #[doc(hidden)] + #[must_use] + pub fn empty() -> Self { + Self(Bytes::from_static(&[])) + } + #[doc(hidden)] #[must_use] pub fn slice(&self, range: impl std::ops::RangeBounds) -> Self { Self(self.0.slice(range)) } + #[must_use] + #[doc(hidden)] + pub fn with_size(len: usize) -> Self { + let bytes = vec![0; len]; + Self(Bytes::from(bytes)) + } + + /// Constructs a [`Slice`] from an I/O reader by pulling in `len` bytes. #[doc(hidden)] pub fn from_reader(reader: &mut R, len: usize) -> std::io::Result { let mut builder = BytesMut::zeroed(len); From 9b004499532b8e16abbd9765306e4f0b095d4dce Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sat, 25 Jan 2025 17:59:26 +0100 Subject: [PATCH 16/19] refactor --- src/slice.rs | 12 +++++++++--- src/slice/slice_bytes.rs | 8 -------- src/slice/{slice_arc.rs => slice_default.rs} | 12 ++---------- 3 files changed, 11 insertions(+), 21 deletions(-) rename src/slice/{slice_arc.rs => slice_default.rs} (86%) diff --git a/src/slice.rs b/src/slice.rs index 28a8c23..9d150f4 100644 --- a/src/slice.rs +++ b/src/slice.rs @@ -3,7 +3,7 @@ // (found in the LICENSE-* files in the repository) #[cfg(not(feature = "bytes"))] -mod slice_arc; +mod slice_default; #[cfg(feature = "bytes")] mod slice_bytes; @@ -13,10 +13,10 @@ use std::{ sync::Arc, }; -#[cfg(not(feature = "bytes"))] -pub use slice_arc::Slice; #[cfg(feature = "bytes")] pub use slice_bytes::Slice; +#[cfg(not(feature = "bytes"))] +pub use slice_default::Slice; impl AsRef<[u8]> for Slice { fn as_ref(&self) -> &[u8] { @@ -38,6 +38,12 @@ impl From<&[u8]> for Slice { } } +impl From> for Slice { + fn from(value: Arc<[u8]>) -> Self { + Self::from(&*value) + } +} + impl From<&Vec> for Slice { fn from(value: &Vec) -> Self { Self::from(value.as_slice()) diff --git a/src/slice/slice_bytes.rs b/src/slice/slice_bytes.rs index 824c530..fb4b01e 100644 --- a/src/slice/slice_bytes.rs +++ b/src/slice/slice_bytes.rs @@ -3,7 +3,6 @@ // (found in the LICENSE-* files in the repository) use bytes::{Bytes, BytesMut}; -use std::sync::Arc; /// An immutable byte slice that can be cloned without additional heap allocation #[derive(Debug, Clone, Eq, Hash, Ord)] @@ -69,10 +68,3 @@ impl From for Slice { Self(Bytes::from(value)) } } - -// Needed because slice_arc specializes this impl -impl From> for Slice { - fn from(value: Arc<[u8]>) -> Self { - Self::new(value.as_ref()) - } -} diff --git a/src/slice/slice_arc.rs b/src/slice/slice_default.rs similarity index 86% rename from src/slice/slice_arc.rs rename to src/slice/slice_default.rs index e11bd1b..e672372 100644 --- a/src/slice/slice_arc.rs +++ b/src/slice/slice_default.rs @@ -3,7 +3,6 @@ // (found in the LICENSE-* files in the repository) use byteview::ByteView; -use std::sync::Arc; /// An immutable byte slice that can be cloned without additional heap allocation #[derive(Debug, Clone, Eq, Hash, Ord)] @@ -42,23 +41,16 @@ impl Slice { } } -// Arc::from> is specialized +// Arc::from> is specialized impl From> for Slice { fn from(value: Vec) -> Self { Self(ByteView::from(value)) } } -// Arc::from> is specialized +// Arc::from> is specialized impl From for Slice { fn from(value: String) -> Self { Self(ByteView::from(value.into_bytes())) } } - -// direct conversion -impl From> for Slice { - fn from(value: Arc<[u8]>) -> Self { - Self::from(&*value) - } -} From 1607f37aeb9d3680d04e77be291ac54905a81f87 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 26 Jan 2025 16:59:09 +0100 Subject: [PATCH 17/19] comment --- src/segment/reader.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/segment/reader.rs b/src/segment/reader.rs index 5ba0a78..42d11df 100644 --- a/src/segment/reader.rs +++ b/src/segment/reader.rs @@ -93,6 +93,7 @@ impl Iterator for Reader { let val_len = fail_iter!(self.inner.read_u32::()); let val = match &self.compression { Some(compressor) => { + // TODO: https://github.com/PSeitz/lz4_flex/issues/166 let mut val = vec![0; val_len as usize]; fail_iter!(self.inner.read_exact(&mut val)); Slice::from(fail_iter!(compressor.decompress(&val))) From 04aea1a9779f160c6e77b84a2996de0738a7d92f Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 26 Jan 2025 19:34:52 +0100 Subject: [PATCH 18/19] update byteview --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0db65d4..eb957c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ bytes = ["dep:bytes"] [dependencies] bytes = { version = "1", optional = true } byteorder = "1.5.0" -byteview = "0.3.0" +byteview = "0.4.0" interval-heap = "0.0.5" log = "0.4.22" path-absolutize = "3.1.1" From 02abbcf468d4ed154869693feea5b0c1f4e3eae1 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Thu, 30 Jan 2025 18:24:30 +0100 Subject: [PATCH 19/19] doc --- src/slice/slice_bytes.rs | 2 ++ src/slice/slice_default.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/slice/slice_bytes.rs b/src/slice/slice_bytes.rs index fb4b01e..80db6d6 100644 --- a/src/slice/slice_bytes.rs +++ b/src/slice/slice_bytes.rs @@ -5,6 +5,8 @@ use bytes::{Bytes, BytesMut}; /// An immutable byte slice that can be cloned without additional heap allocation +/// +/// There is no guarantee of any sort of alignment for zero-copy (de)serialization. #[derive(Debug, Clone, Eq, Hash, Ord)] pub struct Slice(pub(super) Bytes); diff --git a/src/slice/slice_default.rs b/src/slice/slice_default.rs index e672372..1b3911d 100644 --- a/src/slice/slice_default.rs +++ b/src/slice/slice_default.rs @@ -5,6 +5,8 @@ use byteview::ByteView; /// An immutable byte slice that can be cloned without additional heap allocation +/// +/// There is no guarantee of any sort of alignment for zero-copy (de)serialization. #[derive(Debug, Clone, Eq, Hash, Ord)] pub struct Slice(pub(super) ByteView);