awslabs · vladem · Nov 6, 2024 · Nov 5, 2024 · Nov 6, 2024 · passaro
diff --git a/mountpoint-s3/src/cli.rs b/mountpoint-s3/src/cli.rs
@@ -309,7 +309,7 @@ pub struct CliArgs {
     #[cfg(feature = "block_size")]
     #[clap(
         long,
-        help = "Size of a cache block in KiB [Default: 1024 (1 MiB) for disk cache and for S3 Express cache]",
+        help = "Size of a cache block in KiB [Default: 1024 (1 MiB)]",
         help_heading = CACHING_OPTIONS_HEADER,
         value_name = "KiB",
         requires = "cache_group",

diff --git a/mountpoint-s3/src/data_cache/express_data_cache.rs b/mountpoint-s3/src/data_cache/express_data_cache.rs
@@ -78,6 +78,8 @@ where
         };
 
         pin_mut!(result);
+        // Guarantee that the request will start even in case of `initial_read_window == 0`.
+        result.as_mut().increment_read_window(self.block_size as usize);
 
         // TODO: optimize for the common case of a single chunk.
         let mut buffer = BytesMut::default();
@@ -89,6 +91,7 @@ where
                     }
                     buffer.extend_from_slice(&body);
 
+                    // Ensure the flow-control window is large enough.
                     result.as_mut().increment_read_window(self.block_size as usize);
                 }
                 Err(ObjectClientError::ServiceError(GetObjectError::NoSuchKey)) => return Ok(None),

diff --git a/mountpoint-s3/src/data_cache/multilevel_cache.rs b/mountpoint-s3/src/data_cache/multilevel_cache.rs
@@ -9,9 +9,6 @@ use crate::object::ObjectId;
 use super::{BlockIndex, ChecksummedBytes, DataCache, DataCacheResult};
 
 /// A data cache which uses both the local disk and S3 Express One Zone bucket as a storage.
-/// Disk cache is assumed to be faster so this is quiried first on `get_block` requests. An
-/// S3 Express One Zone cache is checked when data is missing on disk. Both caches are
-/// populated on `put_block`.
 pub struct MultilevelDataCache<DiskCache, ExpressCache, Runtime> {
     disk_cache: Arc<DiskCache>,
     express_cache: ExpressCache,
@@ -21,11 +18,9 @@ pub struct MultilevelDataCache<DiskCache, ExpressCache, Runtime> {
 impl<DiskCache: DataCache, ExpressCache: DataCache, Runtime: Spawn>
     MultilevelDataCache<DiskCache, ExpressCache, Runtime>
 {
+    /// Both the `disk_cache` and `express_cache` must be configured with the same `block_size`.
     pub fn new(disk_cache: Arc<DiskCache>, express_cache: ExpressCache, runtime: Runtime) -> Self {
-        // Method `MultilevelDataCache::block_size` relies on block sizes of both caches to be equal.
-        // `CachingPartStream`, being the user of cache, uses this method to split S3 object into blocks.
-        // Allowing non-matching block sizes would mean splitting objects in 2 different ways and imply
-        // the different interface for the `MultilevelDataCache`.
+        // The same blocks are written to both caches. The `block_size`-s must match.
         assert_eq!(
             disk_cache.block_size(),
             express_cache.block_size(),
@@ -46,6 +41,7 @@ where
     ExpressCache: DataCache + Sync,
     Runtime: Spawn + Sync,
 {
+    /// Gets a block from one of the underlying caches. Populates the disk cache with data fetched from the S3 Express cache.
     async fn get_block(
         &self,
         cache_key: &ObjectId,
@@ -82,6 +78,7 @@ where
         DataCacheResult::Ok(None)
     }
 
+    /// Puts a block to both caches.
     async fn put_block(
         &self,
         cache_key: ObjectId,