From ddffef3da7b081c4b4c500f723cf5704893f4cf3 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 2 Sep 2024 15:18:35 +0200 Subject: [PATCH 1/6] make `GILOnceCell` threadsafe --- newsfragments/4512.changed.md | 1 + src/sync.rs | 70 ++++++++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 newsfragments/4512.changed.md diff --git a/newsfragments/4512.changed.md b/newsfragments/4512.changed.md new file mode 100644 index 00000000000..1e86689c5ae --- /dev/null +++ b/newsfragments/4512.changed.md @@ -0,0 +1 @@ +`GILOnceCell` is now thread-safe for the Python 3.13 freethreaded builds. diff --git a/src/sync.rs b/src/sync.rs index c781755c067..8296c4aca97 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -8,7 +8,7 @@ use crate::{ types::{any::PyAnyMethods, PyString, PyType}, Bound, Py, PyResult, PyVisit, Python, }; -use std::cell::UnsafeCell; +use std::{cell::UnsafeCell, mem::MaybeUninit, sync::Once}; /// Value with concurrent access protected by the GIL. /// @@ -68,7 +68,7 @@ unsafe impl Sync for GILProtected where T: Send {} /// from reentrant initialization. /// 2) If the initialization function `f` provided to `get_or_init` (or `get_or_try_init`) /// temporarily releases the GIL (e.g. by calling `Python::import`) then it is possible -/// for a second thread to also begin initializing the `GITOnceCell`. Even when this +/// for a second thread to also begin initializing the `GILOnceCell`. Even when this /// happens `GILOnceCell` guarantees that only **one** write to the cell ever occurs - /// this is treated as a race, other threads will discard the value they compute and /// return the result of the first complete computation. @@ -92,8 +92,16 @@ unsafe impl Sync for GILProtected where T: Send {} /// } /// # Python::with_gil(|py| assert_eq!(get_shared_list(py).len(), 0)); /// ``` -#[derive(Default)] -pub struct GILOnceCell(UnsafeCell>); +pub struct GILOnceCell { + once: Once, + data: UnsafeCell>, +} + +impl Default for GILOnceCell { + fn default() -> Self { + Self::new() + } +} // T: Send is needed for Sync because the thread which drops the GILOnceCell can be different // to the thread which fills it. @@ -103,14 +111,21 @@ unsafe impl Send for GILOnceCell {} impl GILOnceCell { /// Create a `GILOnceCell` which does not yet contain a value. pub const fn new() -> Self { - Self(UnsafeCell::new(None)) + Self { + once: Once::new(), + data: UnsafeCell::new(MaybeUninit::uninit()), + } } /// Get a reference to the contained value, or `None` if the cell has not yet been written. #[inline] pub fn get(&self, _py: Python<'_>) -> Option<&T> { - // Safe because if the cell has not yet been written, None is returned. - unsafe { &*self.0.get() }.as_ref() + if self.once.is_completed() { + // SAFETY: the cell has been written. + Some(unsafe { (*self.data.get()).assume_init_ref() }) + } else { + None + } } /// Get a reference to the contained value, initializing it if needed using the provided @@ -164,7 +179,12 @@ impl GILOnceCell { /// Get the contents of the cell mutably. This is only possible if the reference to the cell is /// unique. pub fn get_mut(&mut self) -> Option<&mut T> { - self.0.get_mut().as_mut() + if self.once.is_completed() { + // SAFETY: the cell has been written. + Some(unsafe { (&mut *self.data.get()).assume_init_mut() }) + } else { + None + } } /// Set the value in the cell. @@ -172,28 +192,44 @@ impl GILOnceCell { /// If the cell has already been written, `Err(value)` will be returned containing the new /// value which was not written. pub fn set(&self, _py: Python<'_>, value: T) -> Result<(), T> { - // Safe because GIL is held, so no other thread can be writing to this cell concurrently. - let inner = unsafe { &mut *self.0.get() }; - if inner.is_some() { - return Err(value); - } + let mut value = Some(value); + // NB this can block, but only for the duration which the first thread to complete + // initialization is writing to the cell. We therefore don't need to worry about + // deadlocks with the GIL. + self.once.call_once_force(|_| { + // SAFETY: no other threads can be writing this value, because we are + // inside the `call_once_force` closure. + unsafe { + // `.take().unwrap()` will never panic + (&mut *self.data.get()).write(value.take().unwrap()); + } + }); - *inner = Some(value); - Ok(()) + match value { + // Some other thread wrote to the cell first + Some(value) => Err(value), + None => Ok(()), + } } /// Takes the value out of the cell, moving it back to an uninitialized state. /// /// Has no effect and returns None if the cell has not yet been written. pub fn take(&mut self) -> Option { - self.0.get_mut().take() + // We leave `self` in a default (empty) state + std::mem::take(self).into_inner() } /// Consumes the cell, returning the wrapped value. /// /// Returns None if the cell has not yet been written. pub fn into_inner(self) -> Option { - self.0.into_inner() + if self.once.is_completed() { + // SAFETY: the cell has been written. + Some(unsafe { self.data.into_inner().assume_init() }) + } else { + None + } } } From 7fcf6f27c2918fab51f4c0dad642d96ec354601b Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 27 Sep 2024 13:38:19 +0100 Subject: [PATCH 2/6] fix clippy --- src/sync.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index ae30e28725b..42dfb45f388 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -189,7 +189,7 @@ impl GILOnceCell { pub fn get_mut(&mut self) -> Option<&mut T> { if self.once.is_completed() { // SAFETY: the cell has been written. - Some(unsafe { (&mut *self.data.get()).assume_init_mut() }) + Some(unsafe { (*self.data.get()).assume_init_mut() }) } else { None } @@ -209,7 +209,7 @@ impl GILOnceCell { // inside the `call_once_force` closure. unsafe { // `.take().unwrap()` will never panic - (&mut *self.data.get()).write(value.take().unwrap()); + (*self.data.get()).write(value.take().unwrap()); } }); @@ -242,11 +242,18 @@ impl GILOnceCell { } impl GILOnceCell> { - /// Create a new cell that contains a new Python reference to the same contained object. + /// Creates a new cell that contains a new Python reference to the same contained object. /// - /// Returns an uninitialised cell if `self` has not yet been initialised. + /// Returns an uninitialized cell if `self` has not yet been initialized. pub fn clone_ref(&self, py: Python<'_>) -> Self { - Self(UnsafeCell::new(self.get(py).map(|ob| ob.clone_ref(py)))) + let cloned = Self { + once: Once::new(), + data: UnsafeCell::new(MaybeUninit::uninit()), + }; + if let Some(value) = self.get(py) { + let _ = cloned.set(py, value.clone_ref(py)); + } + cloned } } From fdaacf5d2cb3355e78e16cb5e266d6f6dd6eca23 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 27 Sep 2024 14:06:53 +0100 Subject: [PATCH 3/6] update documentation --- guide/src/faq.md | 12 ++++++------ guide/src/migration.md | 2 ++ src/sync.rs | 43 ++++++++++++++++++++++++++---------------- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/guide/src/faq.md b/guide/src/faq.md index bdccc6503cb..5752e14adbd 100644 --- a/guide/src/faq.md +++ b/guide/src/faq.md @@ -2,18 +2,18 @@ Sorry that you're having trouble using PyO3. If you can't find the answer to your problem in the list below, you can also reach out for help on [GitHub Discussions](https://github.com/PyO3/pyo3/discussions) and on [Discord](https://discord.gg/33kcChzH7f). -## I'm experiencing deadlocks using PyO3 with lazy_static or once_cell! +## I'm experiencing deadlocks using PyO3 with `std::sync::OnceLock`, `std::sync::LazyLock`, `lazy_static`, and `once_cell`! -`lazy_static` and `once_cell::sync` both use locks to ensure that initialization is performed only by a single thread. Because the Python GIL is an additional lock this can lead to deadlocks in the following way: +`OnceLock`, `LazyLock`, and their thirdparty predecessors use blocking to ensure only one thread ever initializes them. Because the Python GIL is an additional lock this can lead to deadlocks in the following way: -1. A thread (thread A) which has acquired the Python GIL starts initialization of a `lazy_static` value. +1. A thread (thread A) which has acquired the Python GIL starts initialization of a `OnceLock` value. 2. The initialization code calls some Python API which temporarily releases the GIL e.g. `Python::import`. -3. Another thread (thread B) acquires the Python GIL and attempts to access the same `lazy_static` value. -4. Thread B is blocked, because it waits for `lazy_static`'s initialization to lock to release. +3. Another thread (thread B) acquires the Python GIL and attempts to access the same `OnceLock` value. +4. Thread B is blocked, because it waits for `OnceLock`'s initialization to lock to release. 5. Thread A is blocked, because it waits to re-acquire the GIL which thread B still holds. 6. Deadlock. -PyO3 provides a struct [`GILOnceCell`] which works equivalently to `OnceCell` but relies solely on the Python GIL for thread safety. This means it can be used in place of `lazy_static` or `once_cell` where you are experiencing the deadlock described above. See the documentation for [`GILOnceCell`] for an example how to use it. +PyO3 provides a struct [`GILOnceCell`] which works similarly to these types but avoids risk of deadlocking with the Python GIL. This means it can be used in place of other choices when you are experiencing the deadlock described above. See the documentation for [`GILOnceCell`] for further details and an example how to use it. [`GILOnceCell`]: {{#PYO3_DOCS_URL}}/pyo3/sync/struct.GILOnceCell.html diff --git a/guide/src/migration.md b/guide/src/migration.md index fb212743702..42ba9696566 100644 --- a/guide/src/migration.md +++ b/guide/src/migration.md @@ -205,6 +205,8 @@ impl<'a, 'py> IntoPyObject<'py> for &'a MyPyObjectWrapper { PyO3 0.23 introduces preliminary support for the new free-threaded build of CPython 3.13. PyO3 features that implicitly assumed the existence of the GIL are not exposed in the free-threaded build, since they are no longer safe. +Other features, such as `GILOnceCell`, have been internally rewritten to be +threadsafe without the GIL. If you make use of these features then you will need to account for the unavailability of this API in the free-threaded build. One way to handle it is diff --git a/src/sync.rs b/src/sync.rs index 42dfb45f388..86c82f18e56 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -62,24 +62,34 @@ impl GILProtected { #[cfg(not(Py_GIL_DISABLED))] unsafe impl Sync for GILProtected where T: Send {} -/// A write-once cell similar to [`once_cell::OnceCell`](https://docs.rs/once_cell/latest/once_cell/). +/// A write-once primitive similar to [`std::sync::OnceLock`]. /// -/// Unlike `once_cell::sync` which blocks threads to achieve thread safety, this implementation -/// uses the Python GIL to mediate concurrent access. This helps in cases where `once_cell` or -/// `lazy_static`'s synchronization strategy can lead to deadlocks when interacting with the Python -/// GIL. For an example, see -#[doc = concat!("[the FAQ section](https://pyo3.rs/v", env!("CARGO_PKG_VERSION"), "/faq.html)")] +/// Unlike `OnceLock` which blocks threads to achieve thread safety, `GilOnceCell` +/// allows calls to [`get_or_init`][GILOnceCell::get_or_init] and +/// [`get_or_try_init`][GILOnceCell::get_or_try_init] to race to create an initialized value. +/// (It is still guaranteed that only one thread will ever write to the cell.) +/// +/// On Python versions that run with the Global Interpreter Lock (GIL), this helps to avoid +/// deadlocks between initialization and the GIL. For an example of such a deadlock, see +#[doc = concat!( + "[the FAQ section](https://pyo3.rs/v", + env!("CARGO_PKG_VERSION"), + "/faq.html#im-experiencing-deadlocks-using-pyo3-with-stdsynconcelock-stdsynclazylock-lazy_static-and-once_cell)" +)] /// of the guide. /// -/// Note that: -/// 1) `get_or_init` and `get_or_try_init` do not protect against infinite recursion -/// from reentrant initialization. -/// 2) If the initialization function `f` provided to `get_or_init` (or `get_or_try_init`) -/// temporarily releases the GIL (e.g. by calling `Python::import`) then it is possible -/// for a second thread to also begin initializing the `GILOnceCell`. Even when this -/// happens `GILOnceCell` guarantees that only **one** write to the cell ever occurs - -/// this is treated as a race, other threads will discard the value they compute and -/// return the result of the first complete computation. +/// Note that because the GIL blocks concurrent execution, in practice the means that +/// [`get_or_init`][GILOnceCell::get_or_init] and +/// [`get_or_try_init`][GILOnceCell::get_or_try_init] can only race if the initialization +/// function does work that can allow the GIL to switch threads (e.g. Python imports or calling +/// Python functions). On freethreaded Python without the GIL, the race creating wasted work is +/// more likely (and PyO3 may change GILOnceCell to behave more like the GIL build in the future). +/// +/// # Re-entrant initialization +/// +/// [`get_or_init`][GILOnceCell::get_or_init] and +/// [`get_or_try_init`][GILOnceCell::get_or_try_init] do not protect against infinite recursion +/// from reentrant initialization. /// /// # Examples /// @@ -112,7 +122,8 @@ impl Default for GILOnceCell { } // T: Send is needed for Sync because the thread which drops the GILOnceCell can be different -// to the thread which fills it. +// to the thread which fills it. (e.g. think scoped thread which fills the cell and then exits, +// leaving the cell to be dropped by the main thread). unsafe impl Sync for GILOnceCell {} unsafe impl Send for GILOnceCell {} From 268859a9fa3dbf8e6a9cd90e767a1fd06c293d82 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Fri, 27 Sep 2024 21:42:24 +0100 Subject: [PATCH 4/6] implement `Drop` for `GILOnceCell` --- src/sync.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 86c82f18e56..1312b4719f8 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -8,7 +8,7 @@ use crate::{ types::{any::PyAnyMethods, PyString}, Bound, Py, PyResult, PyTypeCheck, Python, }; -use std::{cell::UnsafeCell, mem::MaybeUninit, sync::Once}; +use std::{cell::UnsafeCell, marker::PhantomData, mem::MaybeUninit, sync::Once}; #[cfg(not(Py_GIL_DISABLED))] use crate::PyVisit; @@ -113,6 +113,28 @@ unsafe impl Sync for GILProtected where T: Send {} pub struct GILOnceCell { once: Once, data: UnsafeCell>, + + /// (Copied from std::sync::OnceLock) + /// + /// `PhantomData` to make sure dropck understands we're dropping T in our Drop impl. + /// + /// ``` + /// use pyo3::Python; + /// use pyo3::sync::GILOnceCell; + /// + /// struct A<'a>(#[allow(dead_code)] &'a str); + /// + /// impl<'a> Drop for A<'a> { + /// fn drop(&mut self) {} + /// } + /// + /// let cell = GILOnceCell::new(); + /// { + /// let s = String::new(); + /// let _ = Python::with_gil(|py| cell.set(py,A(&s))); + /// } + /// ``` + _marker: PhantomData, } impl Default for GILOnceCell { @@ -133,6 +155,7 @@ impl GILOnceCell { Self { once: Once::new(), data: UnsafeCell::new(MaybeUninit::uninit()), + _marker: PhantomData, } } @@ -235,20 +258,23 @@ impl GILOnceCell { /// /// Has no effect and returns None if the cell has not yet been written. pub fn take(&mut self) -> Option { - // We leave `self` in a default (empty) state - std::mem::take(self).into_inner() + if self.once.is_completed() { + // Reset the cell to its default state so that it won't try to + // drop the value again. + self.once = Once::new(); + // SAFETY: the cell has been written. `self.once` has been reset, + // so when `self` is dropped the value won't be read again. + Some(unsafe { self.data.get_mut().assume_init_read() }) + } else { + None + } } /// Consumes the cell, returning the wrapped value. /// /// Returns None if the cell has not yet been written. - pub fn into_inner(self) -> Option { - if self.once.is_completed() { - // SAFETY: the cell has been written. - Some(unsafe { self.data.into_inner().assume_init() }) - } else { - None - } + pub fn into_inner(mut self) -> Option { + self.take() } } @@ -260,6 +286,7 @@ impl GILOnceCell> { let cloned = Self { once: Once::new(), data: UnsafeCell::new(MaybeUninit::uninit()), + _marker: PhantomData, }; if let Some(value) = self.get(py) { let _ = cloned.set(py, value.clone_ref(py)); @@ -320,6 +347,15 @@ where } } +impl Drop for GILOnceCell { + fn drop(&mut self) { + if self.once.is_completed() { + // SAFETY: the cell has been written. + unsafe { MaybeUninit::assume_init_drop(self.data.get_mut()) } + } + } +} + /// Interns `text` as a Python string and stores a reference to it in static storage. /// /// A reference to the same Python string is returned on each invocation. @@ -435,4 +471,27 @@ mod tests { assert!(cell_py.clone_ref(py).get(py).unwrap().is_none(py)); }) } + + #[test] + fn test_once_cell_drop() { + #[derive(Debug)] + struct RecordDrop<'a>(&'a mut bool); + + impl Drop for RecordDrop<'_> { + fn drop(&mut self) { + *self.0 = true; + } + } + + Python::with_gil(|py| { + let mut dropped = false; + let cell = GILOnceCell::new(); + cell.set(py, RecordDrop(&mut dropped)).unwrap(); + let drop_container = cell.get(py).unwrap(); + + assert!(!*drop_container.0); + drop(cell); + assert!(dropped); + }); + } } From f97875b8353e589e84f4afb97e6cba8bfdc947df Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 30 Sep 2024 19:49:41 +0100 Subject: [PATCH 5/6] suggestions from ngoldbaum Co-authored-by: Nathan Goldbaum --- src/sync.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 1312b4719f8..87e21fcab85 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -80,10 +80,12 @@ unsafe impl Sync for GILProtected where T: Send {} /// /// Note that because the GIL blocks concurrent execution, in practice the means that /// [`get_or_init`][GILOnceCell::get_or_init] and -/// [`get_or_try_init`][GILOnceCell::get_or_try_init] can only race if the initialization -/// function does work that can allow the GIL to switch threads (e.g. Python imports or calling -/// Python functions). On freethreaded Python without the GIL, the race creating wasted work is -/// more likely (and PyO3 may change GILOnceCell to behave more like the GIL build in the future). +/// [`get_or_try_init`][GILOnceCell::get_or_try_init] may race if the initialization +/// function leads to the GIL being released and a thread context switch. This can +/// happen when importing or calling any Python code, as long as it releases the +/// GIL at some point. On free-threaded Python without any GIL, the race is +/// more likely since there is no GIL to prevent races. In the future, we may change +/// the semantics of GILOnceCell to behave more like the GIL build in the future. /// /// # Re-entrant initialization /// @@ -235,8 +237,8 @@ impl GILOnceCell { /// value which was not written. pub fn set(&self, _py: Python<'_>, value: T) -> Result<(), T> { let mut value = Some(value); - // NB this can block, but only for the duration which the first thread to complete - // initialization is writing to the cell. We therefore don't need to worry about + // NB this can block, but since this is only writing a single value and + // does not call arbitrary python code, we don't need to worry about // deadlocks with the GIL. self.once.call_once_force(|_| { // SAFETY: no other threads can be writing this value, because we are From dae322c0eee40e91f819ef4b04a0cb2ae3dad5f4 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Mon, 30 Sep 2024 19:51:01 +0100 Subject: [PATCH 6/6] fix compile error, adjust comments --- src/sync.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/sync.rs b/src/sync.rs index 87e21fcab85..52b98ea1182 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -81,10 +81,10 @@ unsafe impl Sync for GILProtected where T: Send {} /// Note that because the GIL blocks concurrent execution, in practice the means that /// [`get_or_init`][GILOnceCell::get_or_init] and /// [`get_or_try_init`][GILOnceCell::get_or_try_init] may race if the initialization -/// function leads to the GIL being released and a thread context switch. This can -/// happen when importing or calling any Python code, as long as it releases the +/// function leads to the GIL being released and a thread context switch. This can +/// happen when importing or calling any Python code, as long as it releases the /// GIL at some point. On free-threaded Python without any GIL, the race is -/// more likely since there is no GIL to prevent races. In the future, we may change +/// more likely since there is no GIL to prevent races. In the future, PyO3 may change /// the semantics of GILOnceCell to behave more like the GIL build in the future. /// /// # Re-entrant initialization @@ -120,7 +120,7 @@ pub struct GILOnceCell { /// /// `PhantomData` to make sure dropck understands we're dropping T in our Drop impl. /// - /// ``` + /// ```compile_error,E0597 /// use pyo3::Python; /// use pyo3::sync::GILOnceCell; /// @@ -214,6 +214,10 @@ impl GILOnceCell { // Note that f() could temporarily release the GIL, so it's possible that another thread // writes to this GILOnceCell before f() finishes. That's fine; we'll just have to discard // the value computed here and accept a bit of wasted computation. + + // TODO: on the freethreaded build, consider wrapping this pair of operations in a + // critical section (requires a critical section API which can use a PyMutex without + // an object.) let value = f()?; let _ = self.set(py, value); @@ -237,7 +241,7 @@ impl GILOnceCell { /// value which was not written. pub fn set(&self, _py: Python<'_>, value: T) -> Result<(), T> { let mut value = Some(value); - // NB this can block, but since this is only writing a single value and + // NB this can block, but since this is only writing a single value and // does not call arbitrary python code, we don't need to worry about // deadlocks with the GIL. self.once.call_once_force(|_| {