Skip to content

Commit 7cb2063

Browse files
authored
feat: improve estimated disk usage (#1676)
1 parent bea5b2f commit 7cb2063

File tree

3 files changed

+269
-126
lines changed

3 files changed

+269
-126
lines changed

crates/storage/src/versioned/id_indexed_v1/pruning_strategy.rs

+34-18
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,12 @@ impl PruningStrategy {
109109

110110
/// Returns `true` when used capacity is above target capacity.
111111
pub fn is_usage_above_target_capacity(&self, usage_stats: &UsageStats) -> bool {
112-
usage_stats.is_above(self.target_capacity_bytes())
112+
usage_stats.is_estimated_disk_usage_above(self.target_capacity_bytes())
113113
}
114114

115115
/// Returns `true` when used capacity is above storage capacity.
116116
pub fn should_prune(&self, usage_stats: &UsageStats) -> bool {
117-
usage_stats.is_above(self.config.storage_capacity_bytes)
117+
usage_stats.is_estimated_disk_usage_above(self.config.storage_capacity_bytes)
118118
}
119119

120120
/// Returns the number of entries to prune.
@@ -128,9 +128,9 @@ impl PruningStrategy {
128128
debug!(
129129
Db = %self.config.content_type,
130130
"Storage capacity is 0. Pruning everything ({})",
131-
usage_stats.entry_count
131+
usage_stats.entry_count(),
132132
);
133-
return usage_stats.entry_count;
133+
return usage_stats.entry_count();
134134
}
135135

136136
self.estimate_to_delete_until_target(usage_stats)
@@ -167,16 +167,17 @@ impl PruningStrategy {
167167
/// Returns the estimated number of items to delete to reach target capacity. It returns 0 if
168168
/// already below target capacity.
169169
fn estimate_to_delete_until_target(&self, usage_stats: &UsageStats) -> u64 {
170-
let Some(average_entry_size_bytes) = usage_stats.average_entry_size_bytes() else {
170+
let Some(average_entry_disk_usage_bytes) = usage_stats.average_entry_disk_usage_bytes()
171+
else {
171172
// Means that storage is empty and nothing can be deleted.
172173
return 0;
173174
};
174175

175176
// The estimated number of entries at the target capacity.
176177
let estimated_target_capacity_count =
177-
(self.target_capacity_bytes() as f64 / average_entry_size_bytes).floor() as u64;
178-
if usage_stats.entry_count > estimated_target_capacity_count {
179-
usage_stats.entry_count - estimated_target_capacity_count
178+
(self.target_capacity_bytes() as f64 / average_entry_disk_usage_bytes).floor() as u64;
179+
if usage_stats.entry_count() > estimated_target_capacity_count {
180+
usage_stats.entry_count() - estimated_target_capacity_count
180181
} else {
181182
0
182183
}
@@ -205,6 +206,7 @@ mod tests {
205206
use crate::{versioned::ContentType, DistanceFunction};
206207

207208
const DEFAULT_STORAGE_CAPACITY_BYTES: u64 = 1_000_000;
209+
const EXTRA_DISK_USAGE_PER_ENTRY: u64 = 100;
208210

209211
fn create_default_pruning_strategy() -> PruningStrategy {
210212
create_pruning_strategy(DEFAULT_STORAGE_CAPACITY_BYTES)
@@ -237,14 +239,18 @@ mod tests {
237239
#[case::above_full(110, 1_100_000, true)]
238240
fn is_usage_above_target_capacity(
239241
#[case] entry_count: u64,
240-
#[case] total_entry_size_bytes: u64,
242+
#[case] estimated_disk_usage: u64,
241243
#[case] expected: bool,
242244
) {
243245
let pruning_strategy = create_default_pruning_strategy();
244-
let usage_stats = UsageStats {
246+
247+
let total_entry_size_bytes =
248+
estimated_disk_usage - entry_count * EXTRA_DISK_USAGE_PER_ENTRY;
249+
let usage_stats = UsageStats::new(
245250
entry_count,
246251
total_entry_size_bytes,
247-
};
252+
EXTRA_DISK_USAGE_PER_ENTRY,
253+
);
248254

249255
assert_eq!(
250256
pruning_strategy.is_usage_above_target_capacity(&usage_stats),
@@ -264,15 +270,20 @@ mod tests {
264270
#[case::above_full_6(2000, 1_050_000, true, 100)]
265271
fn should_prune_and_pruning_count(
266272
#[case] entry_count: u64,
267-
#[case] total_entry_size_bytes: u64,
273+
#[case] estimated_disk_usage: u64,
268274
#[case] should_prune: bool,
269275
#[case] pruning_count: u64,
270276
) {
271277
let pruning_strategy = create_default_pruning_strategy();
272-
let usage_stats = UsageStats {
278+
279+
let total_entry_size_bytes =
280+
estimated_disk_usage - entry_count * EXTRA_DISK_USAGE_PER_ENTRY;
281+
let usage_stats = UsageStats::new(
273282
entry_count,
274283
total_entry_size_bytes,
275-
};
284+
EXTRA_DISK_USAGE_PER_ENTRY,
285+
);
286+
276287
assert_eq!(
277288
pruning_strategy.should_prune(&usage_stats),
278289
should_prune,
@@ -288,19 +299,24 @@ mod tests {
288299
#[rstest]
289300
#[case::empty(0, 0, false, false, 0)]
290301
#[case::few_entries(100, 20_000, true, true, 100)]
291-
#[case::many_entries(10_000, 1_000_000, true, true, 10_000)]
302+
#[case::many_entries(10_000, 2_000_000, true, true, 10_000)]
292303
fn zero_storage_capacity(
293304
#[case] entry_count: u64,
294-
#[case] total_entry_size_bytes: u64,
305+
#[case] estimated_disk_usage: u64,
295306
#[case] is_usage_above_target_capacity: bool,
296307
#[case] should_prune: bool,
297308
#[case] pruning_count: u64,
298309
) {
299310
let pruning_strategy = create_pruning_strategy(/* storage_capacity_bytes= */ 0);
300-
let usage_stats = UsageStats {
311+
312+
let total_entry_size_bytes =
313+
estimated_disk_usage - entry_count * EXTRA_DISK_USAGE_PER_ENTRY;
314+
let usage_stats = UsageStats::new(
301315
entry_count,
302316
total_entry_size_bytes,
303-
};
317+
EXTRA_DISK_USAGE_PER_ENTRY,
318+
);
319+
304320
assert_eq!(
305321
pruning_strategy.is_usage_above_target_capacity(&usage_stats),
306322
is_usage_above_target_capacity,

0 commit comments

Comments
 (0)