Skip to content

Commit

Permalink
read: allow use of ReadCache without std (#653)
Browse files Browse the repository at this point in the history
  • Loading branch information
philipc authored Mar 27, 2024
1 parent 981b874 commit ff176cc
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 30 deletions.
2 changes: 0 additions & 2 deletions src/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,7 @@ pub use crate::common::*;
mod read_ref;
pub use read_ref::*;

#[cfg(feature = "std")]
mod read_cache;
#[cfg(feature = "std")]
pub use read_cache::*;

mod util;
Expand Down
104 changes: 76 additions & 28 deletions src/read/read_cache.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::cell::RefCell;
use core::convert::TryInto;
use core::mem;
use core::ops::Range;
use std::boxed::Box;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::convert::TryInto;
#[cfg(feature = "std")]
use std::io::{Read, Seek, SeekFrom};
use std::mem;
use std::vec::Vec;

#[cfg(not(feature = "std"))]
use alloc::collections::btree_map::{BTreeMap as Map, Entry};
#[cfg(feature = "std")]
use std::collections::hash_map::{Entry, HashMap as Map};

use crate::read::ReadRef;

Expand All @@ -24,19 +28,19 @@ use crate::read::ReadRef;
/// Note that malformed files can cause the cache to grow much larger than
/// the file size.
#[derive(Debug)]
pub struct ReadCache<R: Read + Seek> {
pub struct ReadCache<R: ReadCacheOps> {
cache: RefCell<ReadCacheInternal<R>>,
}

#[derive(Debug)]
struct ReadCacheInternal<R: Read + Seek> {
struct ReadCacheInternal<R: ReadCacheOps> {
read: R,
bufs: HashMap<(u64, u64), Box<[u8]>>,
strings: HashMap<(u64, u8), Box<[u8]>>,
bufs: Map<(u64, u64), Box<[u8]>>,
strings: Map<(u64, u8), Box<[u8]>>,
len: Option<u64>,
}

impl<R: Read + Seek> ReadCacheInternal<R> {
impl<R: ReadCacheOps> ReadCacheInternal<R> {
/// Ensures this range is contained in the len of the file
fn range_in_bounds(&mut self, range: &Range<u64>) -> Result<(), ()> {
if range.start <= range.end && range.end <= self.len()? {
Expand All @@ -51,22 +55,22 @@ impl<R: Read + Seek> ReadCacheInternal<R> {
match self.len {
Some(len) => Ok(len),
None => {
let len = self.read.seek(SeekFrom::End(0)).map_err(|_| ())?;
let len = self.read.len()?;
self.len = Some(len);
Ok(len)
}
}
}
}

impl<R: Read + Seek> ReadCache<R> {
impl<R: ReadCacheOps> ReadCache<R> {
/// Create an empty `ReadCache` for the given stream.
pub fn new(read: R) -> Self {
ReadCache {
cache: RefCell::new(ReadCacheInternal {
read,
bufs: HashMap::new(),
strings: HashMap::new(),
bufs: Map::new(),
strings: Map::new(),
len: None,
}),
}
Expand All @@ -93,7 +97,7 @@ impl<R: Read + Seek> ReadCache<R> {
}
}

impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
impl<'a, R: ReadCacheOps> ReadRef<'a> for &'a ReadCache<R> {
fn len(self) -> Result<u64, ()> {
self.cache.borrow_mut().len()
}
Expand All @@ -108,12 +112,12 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
let size = size.try_into().map_err(|_| ())?;
cache.read.seek(SeekFrom::Start(offset)).map_err(|_| ())?;
cache.read.seek(offset)?;
let mut bytes = Vec::new();
bytes.try_reserve_exact(size).map_err(|_| ())?;
bytes.resize(size, 0);
let mut bytes = bytes.into_boxed_slice();
cache.read.read_exact(&mut bytes).map_err(|_| ())?;
cache.read.read_exact(&mut bytes)?;
entry.insert(bytes)
}
};
Expand All @@ -128,10 +132,7 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
let buf = match cache.strings.entry((range.start, delimiter)) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
cache
.read
.seek(SeekFrom::Start(range.start))
.map_err(|_| ())?;
cache.read.seek(range.start)?;

let max_check: usize = (range.end - range.start).try_into().map_err(|_| ())?;
// Strings should be relatively small.
Expand All @@ -142,7 +143,7 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
let mut checked = 0;
loop {
bytes.resize((checked + 256).min(max_check), 0);
let read = cache.read.read(&mut bytes[checked..]).map_err(|_| ())?;
let read = cache.read.read(&mut bytes[checked..])?;
if read == 0 {
return Err(());
}
Expand All @@ -168,21 +169,21 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
///
/// Shares an underlying [`ReadCache`] with a lifetime of `'a`.
#[derive(Debug)]
pub struct ReadCacheRange<'a, R: Read + Seek> {
pub struct ReadCacheRange<'a, R: ReadCacheOps> {
r: &'a ReadCache<R>,
offset: u64,
size: u64,
}

impl<'a, R: Read + Seek> Clone for ReadCacheRange<'a, R> {
impl<'a, R: ReadCacheOps> Clone for ReadCacheRange<'a, R> {
fn clone(&self) -> Self {
*self
}
}

impl<'a, R: Read + Seek> Copy for ReadCacheRange<'a, R> {}
impl<'a, R: ReadCacheOps> Copy for ReadCacheRange<'a, R> {}

impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> {
impl<'a, R: ReadCacheOps> ReadRef<'a> for ReadCacheRange<'a, R> {
fn len(self) -> Result<u64, ()> {
Ok(self.size)
}
Expand Down Expand Up @@ -211,3 +212,50 @@ impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> {
Ok(bytes)
}
}

/// Operations required to implement [`ReadCache`].
///
/// This is a subset of the `Read` and `Seek` traits.
/// A blanket implementation is provided for all types that implement
/// `Read + Seek`.
#[allow(clippy::len_without_is_empty)]
pub trait ReadCacheOps {
/// Return the length of the stream.
///
/// Equivalent to `std::io::Seek::seek(SeekFrom::End(0))`.
fn len(&mut self) -> Result<u64, ()>;

/// Seek to the given position in the stream.
///
/// Equivalent to `std::io::Seek::seek` with `SeekFrom::Start(pos)`.
fn seek(&mut self, pos: u64) -> Result<u64, ()>;

/// Read up to `buf.len()` bytes into `buf`.
///
/// Equivalent to `std::io::Read::read`.
fn read(&mut self, buf: &mut [u8]) -> Result<usize, ()>;

/// Read exactly `buf.len()` bytes into `buf`.
///
/// Equivalent to `std::io::Read::read_exact`.
fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()>;
}

#[cfg(feature = "std")]
impl<T: Read + Seek> ReadCacheOps for T {
fn len(&mut self) -> Result<u64, ()> {
self.seek(SeekFrom::End(0)).map_err(|_| ())
}

fn seek(&mut self, pos: u64) -> Result<u64, ()> {
self.seek(SeekFrom::Start(pos)).map_err(|_| ())
}

fn read(&mut self, buf: &mut [u8]) -> Result<usize, ()> {
Read::read(self, buf).map_err(|_| ())
}

fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), ()> {
Read::read_exact(self, buf).map_err(|_| ())
}
}

0 comments on commit ff176cc

Please sign in to comment.