Merge pull request #23 from earthstar-project/encodings

Encoding traits, Path + Entry impls
earthstar-project · Jul 20, 2024 · 06457bd · 06457bd
2 parents 0d7d23b + 61bb7ab
commit 06457bd
Show file tree

Hide file tree

Showing 35 changed files with 2,133 additions and 138 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
 
-members = ["data-model", "fuzz"]
+members = ["data-model", "earthstar", "fuzz"]
 resolver = "2"
 
diff --git a/data-model/Cargo.toml b/data-model/Cargo.toml
@@ -3,8 +3,24 @@ name = "willow-data-model"
 version = "0.1.0"
 edition = "2021"
 
+[features]
+default = []
+dev = ["dep:arbitrary"]
+
 [dependencies]
-arbitrary = { version = "1.0.2", features = [
-    "derive",
-] } # TODO feature-gate this
+either = "1.10.0"
+arbitrary = { version = "1.0.2", features = ["derive"], optional = true }
+ufotofu = "0.2.0"
 bytes = "1.6.0"
+
+[dev-dependencies]
+smol = "2.0.0"
+
+# docs.rs-specific configuration
+[package.metadata.docs.rs]
+# document all features
+all-features = true
+
+
+
+
diff --git a/data-model/src/encoding/compact_width.rs b/data-model/src/encoding/compact_width.rs
@@ -0,0 +1,220 @@
+use core::error::Error;
+
+use crate::encoding::error::{DecodeError, EncodingConsumerError};
+use crate::encoding::parameters::Decoder;
+use crate::encoding::unsigned_int::{U16BE, U32BE, U64BE, U8BE};
+use ufotofu::local_nb::{BulkConsumer, BulkProducer};
+
+/// A minimum width of bytes needed to represent a unsigned integer.
+#[derive(PartialEq, Eq, Debug)]
+pub enum CompactWidth {
+    /// The byte-width required to represent numbers up to 256 (i.e. a 8-bit number).
+    One,
+    /// The byte-width required to represent numbers up to 256^2 (i.e. a 16-bit number).
+    Two,
+    /// The byte-width required to represent numbers up to 256^4 (i.e. a 32-bit number).
+    Four,
+    /// The byte-width required to represent numbers up to 256^8 (i.e. a 64-bit number).
+    Eight,
+}
+
+#[derive(Debug)]
+pub(crate) struct NotACompactWidthError();
+
+impl CompactWidth {
+    /// Return a new [`CompactWidth`].
+    pub(crate) fn new(n: u8) -> Result<CompactWidth, NotACompactWidthError> {
+        match n {
+            1 => Ok(CompactWidth::One),
+            2 => Ok(CompactWidth::Two),
+            4 => Ok(CompactWidth::Four),
+            8 => Ok(CompactWidth::Eight),
+            _ => Err(NotACompactWidthError()),
+        }
+    }
+
+    /// Return the most compact width in bytes (1, 2, 4, or 8) needed to represent a given `u64` as a corresponding 8-bit, 16-bit, 32-bit, or 64-bit number.
+    ///
+    /// [Definition](https://willowprotocol.org/specs/encodings/index.html#compact_width).
+    pub fn from_u64(value: u64) -> Self {
+        if value <= u8::MAX as u64 {
+            CompactWidth::One
+        } else if value <= u16::MAX as u64 {
+            CompactWidth::Two
+        } else if value <= u32::MAX as u64 {
+            CompactWidth::Four
+        } else {
+            CompactWidth::Eight
+        }
+    }
+
+    /// Return the most compact width in bytes (1, 2, 4) needed to represent a given `u32` as a corresponding 8-bit, 16-bit, or 32-bit number.
+    ///
+    /// [Definition](https://willowprotocol.org/specs/encodings/index.html#compact_width).
+    pub fn from_u32(value: u32) -> Self {
+        if value <= u8::MAX as u32 {
+            CompactWidth::One
+        } else if value <= u16::MAX as u32 {
+            CompactWidth::Two
+        } else {
+            CompactWidth::Four
+        }
+    }
+
+    /// Return the most compact width in bytes (1 or 2) needed to represent a given `u16` as a corresponding 8-bit or 16-bit number.
+    ///
+    /// [Definition](https://willowprotocol.org/specs/encodings/index.html#compact_width).
+    pub fn from_u16(value: u16) -> Self {
+        if value <= u8::MAX as u16 {
+            CompactWidth::One
+        } else {
+            CompactWidth::Two
+        }
+    }
+
+    /// Return [`CompactWidth::One`], the only [`CompactWidth`] needed to represent a given `u8`.
+    ///
+    /// [Definition](https://willowprotocol.org/specs/encodings/index.html#compact_width).
+    pub fn from_u8(_: u8) -> Self {
+        CompactWidth::One
+    }
+
+    /// Return the width in bytes of this [`CompactSize`].
+    pub fn width(&self) -> usize {
+        match self {
+            CompactWidth::One => 1,
+            CompactWidth::Two => 2,
+            CompactWidth::Four => 4,
+            CompactWidth::Eight => 8,
+        }
+    }
+}
+
+/// Encode a `u64` integer as a `compact_width(value)`-byte big-endian integer, and consume that with a [`BulkConsumer`].
+pub async fn encode_compact_width_be<Consumer: BulkConsumer<Item = u8>>(
+    value: u64,
+    consumer: &mut Consumer,
+) -> Result<(), EncodingConsumerError<Consumer::Error>>
+where
+    Consumer::Error: Error,
+{
+    let width = CompactWidth::from_u64(value).width();
+
+    consumer
+        .bulk_consume_full_slice(&value.to_be_bytes()[8 - width..])
+        .await?;
+
+    Ok(())
+}
+
+/// Decode the bytes representing a [`compact-width`]-bytes integer into a `usize`.
+pub async fn decode_compact_width_be<Producer: BulkProducer<Item = u8>>(
+    compact_width: CompactWidth,
+    producer: &mut Producer,
+) -> Result<u64, DecodeError<Producer::Error>> {
+    match compact_width {
+        CompactWidth::One => U8BE::decode(producer).await.map(u64::from),
+        CompactWidth::Two => U16BE::decode(producer).await.map(u64::from),
+        CompactWidth::Four => U32BE::decode(producer).await.map(u64::from),
+        CompactWidth::Eight => U64BE::decode(producer).await.map(u64::from),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ufotofu::local_nb::consumer::IntoVec;
+    use ufotofu::local_nb::producer::FromVec;
+
+    use super::*;
+
+    #[test]
+    fn compact_width_works() {
+        // u64
+        assert_eq!(CompactWidth::from_u64(0_u64), CompactWidth::One);
+        assert_eq!(CompactWidth::from_u64(u8::MAX as u64), CompactWidth::One);
+
+        assert_eq!(
+            CompactWidth::from_u64(u8::MAX as u64 + 1),
+            CompactWidth::Two
+        );
+        assert_eq!(CompactWidth::from_u64(u16::MAX as u64), CompactWidth::Two);
+
+        assert_eq!(
+            CompactWidth::from_u64(u16::MAX as u64 + 1),
+            CompactWidth::Four
+        );
+        assert_eq!(CompactWidth::from_u64(u32::MAX as u64), CompactWidth::Four);
+
+        assert_eq!(
+            CompactWidth::from_u64(u32::MAX as u64 + 1),
+            CompactWidth::Eight
+        );
+        assert_eq!(CompactWidth::from_u64(u64::MAX), CompactWidth::Eight);
+
+        // u32
+        assert_eq!(CompactWidth::from_u32(0_u32), CompactWidth::One);
+        assert_eq!(CompactWidth::from_u32(u8::MAX as u32), CompactWidth::One);
+
+        assert_eq!(
+            CompactWidth::from_u32(u8::MAX as u32 + 1),
+            CompactWidth::Two
+        );
+        assert_eq!(CompactWidth::from_u32(u16::MAX as u32), CompactWidth::Two);
+
+        assert_eq!(
+            CompactWidth::from_u32(u16::MAX as u32 + 1),
+            CompactWidth::Four
+        );
+        assert_eq!(CompactWidth::from_u32(u32::MAX), CompactWidth::Four);
+
+        // u16
+        assert_eq!(CompactWidth::from_u16(0_u16), CompactWidth::One);
+        assert_eq!(CompactWidth::from_u16(u8::MAX as u16), CompactWidth::One);
+
+        assert_eq!(
+            CompactWidth::from_u16(u8::MAX as u16 + 1),
+            CompactWidth::Two
+        );
+        assert_eq!(CompactWidth::from_u16(u16::MAX), CompactWidth::Two);
+
+        // u8
+        assert_eq!(CompactWidth::from_u8(0_u8), CompactWidth::One);
+        assert_eq!(CompactWidth::from_u8(u8::MAX), CompactWidth::One);
+    }
+
+    #[test]
+    fn encoding() {
+        let values = [
+            (CompactWidth::One, 0),
+            (CompactWidth::One, u8::MAX as u64),
+            (CompactWidth::Two, u8::MAX as u64 + 1),
+            (CompactWidth::Two, u16::MAX as u64),
+            (CompactWidth::Four, u16::MAX as u64 + 1),
+            (CompactWidth::Four, u32::MAX as u64),
+            (CompactWidth::Eight, u32::MAX as u64 + 1),
+            (CompactWidth::Eight, u64::MAX),
+        ];
+
+        smol::block_on(async {
+            for (compact_width, value) in values {
+                let mut consumer = IntoVec::<u8>::new();
+
+                encode_compact_width_be(value, &mut consumer).await.unwrap();
+
+                let encode_result = consumer.into_vec();
+
+                let decoded_compact_width = CompactWidth::new(encode_result.len() as u8).unwrap();
+
+                assert_eq!(decoded_compact_width, compact_width);
+
+                let mut producer = FromVec::new(encode_result);
+
+                let decode_result = decode_compact_width_be(decoded_compact_width, &mut producer)
+                    .await
+                    .unwrap();
+
+                assert_eq!(decode_result, value);
+            }
+        });
+    }
+}
diff --git a/data-model/src/encoding/error.rs b/data-model/src/encoding/error.rs
@@ -0,0 +1,96 @@
+use core::error::Error;
+use core::{fmt::Display, fmt::Formatter, num::TryFromIntError};
+use either::Either;
+use ufotofu::common::errors::{ConsumeFullSliceError, OverwriteFullSliceError};
+
+/// Returned when a encoding fails to be consumed by a [`ufotofu::local_nb::Consumer`].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct EncodingConsumerError<E> {
+    /// The number of bytes which were consumed before the error.
+    pub bytes_consumed: usize,
+    /// The error returned on the final and failed attempt to consume bytes.
+    pub reason: E,
+}
+
+impl<E> From<ConsumeFullSliceError<E>> for EncodingConsumerError<E> {
+    fn from(err: ConsumeFullSliceError<E>) -> Self {
+        EncodingConsumerError {
+            bytes_consumed: err.consumed,
+            reason: err.reason,
+        }
+    }
+}
+
+impl<E> Error for EncodingConsumerError<E>
+where
+    E: 'static + Error,
+{
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        Some(&self.reason)
+    }
+}
+
+impl<E> Display for EncodingConsumerError<E> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        write!(
+            f,
+            "The consumer failed to consume after consuming {} bytes",
+            self.bytes_consumed
+        )
+    }
+}
+
+/// Everything that can go wrong when decoding a value.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum DecodeError<ProducerError> {
+    /// The producer of the bytes to be decoded errored somehow.
+    Producer(ProducerError),
+    /// The bytes produced by the producer cannot be decoded into anything meaningful.
+    InvalidInput,
+    /// Tried to use a u64 as a usize when the current target's usize is not big enough.
+    U64DoesNotFitUsize,
+}
+
+impl<F, E> From<OverwriteFullSliceError<F, E>> for DecodeError<E> {
+    fn from(value: OverwriteFullSliceError<F, E>) -> Self {
+        match value.reason {
+            Either::Left(_) => DecodeError::InvalidInput,
+            Either::Right(err) => DecodeError::Producer(err),
+        }
+    }
+}
+
+impl<ProducerError> From<TryFromIntError> for DecodeError<ProducerError> {
+    fn from(_: TryFromIntError) -> Self {
+        DecodeError::U64DoesNotFitUsize
+    }
+}
+
+impl<E> Error for DecodeError<E>
+where
+    E: 'static + Error,
+{
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            DecodeError::Producer(err) => Some(err),
+            DecodeError::InvalidInput => None,
+            DecodeError::U64DoesNotFitUsize => None,
+        }
+    }
+}
+
+impl<E> Display for DecodeError<E> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
+        match self {
+            DecodeError::Producer(_) => {
+                write!(f, "The underlying producer encountered an error",)
+            }
+            DecodeError::InvalidInput => {
+                write!(f, "Decoding failed due to receiving invalid input",)
+            }
+            DecodeError::U64DoesNotFitUsize => {
+                write!(f, "Tried (and failed) to decode a u64 to a 32-bit usize",)
+            }
+        }
+    }
+}