Skip to content

Commit 594f8e7

Browse files
committed
feat: poison
In case the database encounters an IO error during the commit, there will be some intermediate partial changes to the database. It is possible to recover from them, however, in the interest of reducing complexity, we mark the database as poisoned and reject any further modification attempts. That allows NOMT to avoid spreading the recovery logic all around but concentrate it in the startup logic.
1 parent 1c4cb80 commit 594f8e7

File tree

3 files changed

+40
-9
lines changed

3 files changed

+40
-9
lines changed

nomt/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,19 @@ impl<T: HashAlgorithm> Nomt<T> {
311311
self.store.sync_seqn()
312312
}
313313

314+
/// Whether the database is poisoned.
315+
///
316+
/// A database becomes poisoned when an error occurred during a commit operation.
317+
///
318+
/// From this point on, the database is in an inconsistent state and should be considered
319+
/// read-only. Any further modifying operations will return an error.
320+
///
321+
/// In order to recover from a poisoned database, the application should discard this instance
322+
/// and create a new one.
323+
pub fn is_poisoned(&self) -> bool {
324+
self.store.is_poisoned()
325+
}
326+
314327
/// Create a new [`Session`] object with the given parameters.
315328
///
316329
/// The [`Session`] is a read-only handle on the database and is used to create a changeset to

nomt/src/store/mod.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use nomt_core::{page_id::PageId, trie::KeyPath};
1717
use parking_lot::Mutex;
1818
use std::{
1919
fs::{File, OpenOptions},
20-
sync::Arc,
20+
sync::{atomic::AtomicBool, Arc},
2121
};
2222

2323
#[cfg(target_os = "linux")]
@@ -46,6 +46,7 @@ struct Shared {
4646
meta_fd: File,
4747
#[allow(unused)]
4848
flock: flock::Flock,
49+
poisoned: AtomicBool,
4950

5051
// Retained for the lifetime of the store.
5152
_db_dir_fd: Arc<File>,
@@ -199,10 +200,17 @@ impl Store {
199200
_db_dir_fd: db_dir_fd,
200201
meta_fd,
201202
flock,
203+
poisoned: false.into(),
202204
}),
203205
})
204206
}
205207

208+
pub fn is_poisoned(&self) -> bool {
209+
self.shared
210+
.poisoned
211+
.load(std::sync::atomic::Ordering::Relaxed)
212+
}
213+
206214
pub fn sync_seqn(&self) -> u32 {
207215
self.sync.lock().sync_seqn
208216
}
@@ -278,16 +286,28 @@ impl Store {
278286
) -> anyhow::Result<()> {
279287
let mut sync = self.sync.lock();
280288

281-
sync.sync(
289+
if self
290+
.shared
291+
.poisoned
292+
.load(std::sync::atomic::Ordering::Relaxed)
293+
{
294+
anyhow::bail!("Store is poisoned due to prior error");
295+
}
296+
297+
if let Err(e) = sync.sync(
282298
&self.shared,
283299
value_tx,
284300
self.shared.pages.clone(),
285301
self.shared.values.clone(),
286302
self.shared.rollback.clone(),
287303
page_cache,
288304
updated_pages,
289-
)
290-
.unwrap();
305+
) {
306+
self.shared
307+
.poisoned
308+
.store(true, std::sync::atomic::Ordering::Relaxed);
309+
return Err(e);
310+
}
291311
Ok(())
292312
}
293313
}

nomt/src/store/sync.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@ impl Sync {
5151
rollback.begin_sync();
5252
}
5353

54-
// TODO: comprehensive error handling is coming later.
55-
bitbox_sync.wait_pre_meta().unwrap();
56-
let beatree_meta_wd = beatree_sync.wait_pre_meta().unwrap();
54+
bitbox_sync.wait_pre_meta()?;
55+
let beatree_meta_wd = beatree_sync.wait_pre_meta()?;
5756
let (rollback_start_live, rollback_end_live) = match rollback_sync {
5857
Some(ref rollback) => rollback.wait_pre_meta(),
5958
None => (0, 0),
@@ -90,8 +89,7 @@ impl Sync {
9089
beatree_sync.post_meta();
9190

9291
if let Some(ref rollback) = rollback_sync {
93-
// TODO: comprehensive error handling is coming later.
94-
rollback.wait_post_meta().unwrap();
92+
rollback.wait_post_meta()?;
9593
}
9694

9795
Ok(())

0 commit comments

Comments
 (0)