diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 91799137101280..46a01c8e591709 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -109,7 +109,7 @@ typedef struct { 3: Interned, Immortal, and Static This categorization allows the runtime to determine the right cleanup mechanism at runtime shutdown. */ - unsigned int interned:2; + uint16_t interned; /* Character size: - PyUnicode_1BYTE_KIND (1): @@ -132,21 +132,23 @@ typedef struct { * all characters are in the range U+0000-U+10FFFF * at least one character is in the range U+10000-U+10FFFF */ - unsigned int kind:3; + unsigned short kind:3; /* Compact is with respect to the allocation scheme. Compact unicode objects only require one memory block while non-compact objects use one block for the PyUnicodeObject struct and another for its data buffer. */ - unsigned int compact:1; + unsigned short compact:1; /* The string only contains characters in the range U+0000-U+007F (ASCII) and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ - unsigned int ascii:1; + unsigned short ascii:1; /* The object is statically allocated. */ - unsigned int statically_allocated:1; + unsigned short statically_allocated:1; /* Padding to ensure that PyUnicode_DATA() is always aligned to - 4 bytes (see issue #19537 on m68k). */ - unsigned int :24; + 4 bytes (see issue #19537 on m68k) and we use unsigned short to avoid + the extra four bytes on 32-bit Windows. This is restricted features + for specific compilers including GCC, MSVC, Clang and IBM's XL compiler. */ + unsigned short :10; } state; } PyASCIIObject; @@ -195,7 +197,11 @@ typedef struct { /* Use only if you know it's a string */ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned); +#else return _PyASCIIObject_CAST(op)->state.interned; +#endif } #define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-01-40-12.gh-issue-128137.gsTwr_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-01-40-12.gh-issue-128137.gsTwr_.rst new file mode 100644 index 00000000000000..a3b7cde7f67676 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-01-40-12.gh-issue-128137.gsTwr_.rst @@ -0,0 +1,2 @@ +Update :c:type:`PyASCIIObject` layout to handle interned field with the +atomic operation. Patch by Donghee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5e532ce0f348c4..3eafa2381c1a4d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15729,7 +15729,7 @@ immortalize_interned(PyObject *s) _Py_DecRefTotal(_PyThreadState_GET()); } #endif - _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; + FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); _Py_SetImmortal(s); } @@ -15848,7 +15848,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, _Py_DecRefTotal(_PyThreadState_GET()); #endif } - _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; + FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ @@ -15984,7 +15984,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) Py_UNREACHABLE(); } if (!shared) { - _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED; + FT_ATOMIC_STORE_UINT16_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_NOT_INTERNED); } } #ifdef INTERNED_STATS