Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128137: Update PyASCIIObject to handle interned field with the atomic operation #128196

Merged
merged 19 commits into from
Jan 5, 2025
Merged
36 changes: 20 additions & 16 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,17 @@ typedef struct {
PyObject_HEAD
Py_ssize_t length; /* Number of code points in the string */
Py_hash_t hash; /* Hash value; -1 if not set */
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
uint16_t interned;
corona10 marked this conversation as resolved.
Show resolved Hide resolved
struct {
/* If interned is non-zero, the two references from the
dictionary to this object are *not* counted in ob_refcnt.
The possible values here are:
0: Not Interned
1: Interned
2: Interned and Immortal
3: Interned, Immortal, and Static
This categorization allows the runtime to determine the right
cleanup mechanism at runtime shutdown. */
unsigned int interned:2;
/* Character size:

- PyUnicode_1BYTE_KIND (1):
Expand All @@ -132,21 +132,21 @@ typedef struct {
* all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/
unsigned int kind:3;
uint16_t kind:3;
corona10 marked this conversation as resolved.
Show resolved Hide resolved
/* Compact is with respect to the allocation scheme. Compact unicode
objects only require one memory block while non-compact objects use
one block for the PyUnicodeObject struct and another for its data
buffer. */
unsigned int compact:1;
uint16_t compact:1;
/* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */
unsigned int ascii:1;
uint16_t ascii:1;
/* The object is statically allocated. */
unsigned int statically_allocated:1;
uint16_t statically_allocated:1;
/* Padding to ensure that PyUnicode_DATA() is always aligned to
4 bytes (see issue #19537 on m68k). */
unsigned int :24;
uint16_t :10;
} state;
} PyASCIIObject;

Expand Down Expand Up @@ -195,7 +195,11 @@ typedef struct {

/* Use only if you know it's a string */
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
return _PyASCIIObject_CAST(op)->state.interned;
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_uint16_relaxed(&(_PyASCIIObject_CAST(op)->interned));
#else
return _PyASCIIObject_CAST(op)->interned;
#endif
kumaraditya303 marked this conversation as resolved.
Show resolved Hide resolved
}
#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))

Expand Down
20 changes: 10 additions & 10 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
data = unicode + 1;
_PyUnicode_LENGTH(unicode) = size;
_PyUnicode_HASH(unicode) = -1;
_PyUnicode_STATE(unicode).interned = 0;
_PyASCIIObject_CAST(unicode)->interned = 0;
_PyUnicode_STATE(unicode).kind = kind;
_PyUnicode_STATE(unicode).compact = 1;
_PyUnicode_STATE(unicode).ascii = is_ascii;
Expand Down Expand Up @@ -1711,7 +1711,7 @@ unicode_dealloc(PyObject *unicode)
_Py_SetImmortal(unicode);
return;
}
switch (_PyUnicode_STATE(unicode).interned) {
switch (_PyASCIIObject_CAST(unicode)->interned) {
case SSTATE_NOT_INTERNED:
break;
case SSTATE_INTERNED_MORTAL:
Expand Down Expand Up @@ -1739,7 +1739,7 @@ unicode_dealloc(PyObject *unicode)
// so it can't cause trouble (except wasted memory)
// - if it wasn't popped, it'll remain interned
_Py_SetImmortal(unicode);
_PyUnicode_STATE(unicode).interned = SSTATE_INTERNED_IMMORTAL;
_PyASCIIObject_CAST(unicode)->interned = SSTATE_INTERNED_IMMORTAL;
return;
}
if (r == 0) {
Expand Down Expand Up @@ -15470,7 +15470,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
#else
_PyUnicode_HASH(self) = _PyUnicode_HASH(unicode);
#endif
_PyUnicode_STATE(self).interned = 0;
_PyASCIIObject_CAST(self)->interned = 0;
_PyUnicode_STATE(self).kind = kind;
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
Expand Down Expand Up @@ -15689,7 +15689,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
assert(r == NULL);
/* but just in case (for the non-debug build), handle this */
if (r != NULL && r != s) {
assert(_PyUnicode_STATE(r).interned == SSTATE_INTERNED_IMMORTAL_STATIC);
assert(_PyASCIIObject_CAST(r)->interned == SSTATE_INTERNED_IMMORTAL_STATIC);
assert(_PyUnicode_CHECK(r));
Py_DECREF(s);
return Py_NewRef(r);
Expand All @@ -15699,7 +15699,7 @@ intern_static(PyInterpreterState *interp, PyObject *s /* stolen */)
Py_FatalError("failed to intern static string");
}

_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL_STATIC;
_PyASCIIObject_CAST(s)->interned = SSTATE_INTERNED_IMMORTAL_STATIC;
return s;
}

Expand All @@ -15726,7 +15726,7 @@ immortalize_interned(PyObject *s)
_Py_DecRefTotal(_PyThreadState_GET());
}
#endif
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_IMMORTAL);
_Py_SetImmortal(s);
}

Expand Down Expand Up @@ -15833,7 +15833,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,

/* NOT_INTERNED -> INTERNED_MORTAL */

assert(_PyUnicode_STATE(s).interned == SSTATE_NOT_INTERNED);
assert(_PyASCIIObject_CAST(s)->interned == SSTATE_NOT_INTERNED);

if (!_Py_IsImmortal(s)) {
/* The two references in interned dict (key and value) are not counted.
Expand All @@ -15845,7 +15845,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
_Py_DecRefTotal(_PyThreadState_GET());
#endif
}
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_INTERNED_MORTAL);

/* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */

Expand Down Expand Up @@ -15981,7 +15981,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
Py_UNREACHABLE();
}
if (!shared) {
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
FT_ATOMIC_STORE_UINT16_RELAXED(_PyASCIIObject_CAST(s)->interned, SSTATE_NOT_INTERNED);
}
}
#ifdef INTERNED_STATS
Expand Down
Loading