From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 6 Jan 2022 16:12:28 +0100 Subject: [PATCH] 00373: bpo-46006: Revert "bpo-40521: Per-interpreter interned strings This reverts commit ea251806b8dffff11b30d2182af1e589caf88acf. Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for the main interpreter. Keep _PyUnicode_ClearInterned() changes avoiding the creation of a temporary Python list object. Leave the PyInterpreterState structure unchanged to keep the ABI backward compatibility with Python 3.10.0: rename the "interned" member to "unused_interned". Fixes https://bugzilla.redhat.com/2030621 --- Include/internal/pycore_interp.h | 12 +--- .../2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst | 5 ++ Objects/typeobject.c | 22 +++++++ Objects/unicodeobject.c | 63 ++++++++++++++----- 4 files changed, 76 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index bfd082b588..4307b61ca3 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -71,15 +71,9 @@ struct _Py_unicode_state { PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; - /* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) - */ - PyObject *interned; + // Unused member kept for ABI backward compatibility with Python 3.10.0: + // see bpo-46006. + PyObject *unused_interned; // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() struct _Py_unicode_ids ids; diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst new file mode 100644 index 0000000000..3acd2b0939 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst @@ -0,0 +1,5 @@ +Fix a regression when a type method like ``__init__()`` is modified in a +subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type +``update_slot()``. Revert the change which made the Unicode dictionary of +interned strings compatible with subinterpreters: the internal interned +dictionary is shared again by all interpreters. Patch by Victor Stinner. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 02046e5f2e..b23e36a420 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -50,6 +50,11 @@ typedef struct PySlot_Offset { } PySlot_Offset; +/* bpo-40521: Interned strings are shared by all subinterpreters */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define INTERN_NAME_STRINGS +#endif + /* alphabetical order */ _Py_IDENTIFIER(__abstractmethods__); _Py_IDENTIFIER(__annotations__); @@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) if (name == NULL) return -1; } +#ifdef INTERN_NAME_STRINGS if (!PyUnicode_CHECK_INTERNED(name)) { PyUnicode_InternInPlace(&name); if (!PyUnicode_CHECK_INTERNED(name)) { @@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) return -1; } } +#endif } else { /* Will fail in _PyObject_GenericSetAttrWithDict. */ @@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void) for (slotdef *p = slotdefs; p->name; p++) { /* Slots must be ordered by their offset in the PyHeapTypeObject. */ assert(!p[1].name || p->offset <= p[1].offset); +#ifdef INTERN_NAME_STRINGS p->name_strobj = PyUnicode_InternFromString(p->name); if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) { return _PyStatus_NO_MEMORY(); } +#else + p->name_strobj = PyUnicode_FromString(p->name); + if (!p->name_strobj) { + return _PyStatus_NO_MEMORY(); + } +#endif } slotdefs_initialized = 1; return _PyStatus_OK(); @@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; assert(PyUnicode_CheckExact(name)); +#ifdef INTERN_NAME_STRINGS assert(PyUnicode_CHECK_INTERNED(name)); +#endif assert(slotdefs_initialized); pp = ptrs; for (p = slotdefs; p->name; p++) { assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CheckExact(name)); +#ifdef INTERN_NAME_STRINGS if (p->name_strobj == name) { *pp++ = p; } +#else + if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) { + *pp++ = p; + } +#endif } *pp = NULL; for (pp = ptrs; *pp; pp++) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c72871074b..077cf8d7f4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -211,6 +211,22 @@ extern "C" { # define OVERALLOCATE_FACTOR 4 #endif +/* bpo-40521: Interned strings are shared by all interpreters. */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define INTERNED_STRINGS +#endif + +/* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) +*/ +#ifdef INTERNED_STRINGS +static PyObject *interned = NULL; +#endif static struct _Py_unicode_state* get_unicode_state(void) @@ -1936,7 +1952,7 @@ unicode_dealloc(PyObject *unicode) case SSTATE_INTERNED_MORTAL: { - struct _Py_unicode_state *state = get_unicode_state(); +#ifdef INTERNED_STRINGS /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1944,12 +1960,13 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(state->interned, unicode) != 0) { + if (PyDict_DelItem(interned, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } assert(Py_REFCNT(unicode) == 1); Py_SET_REFCNT(unicode, 0); +#endif break; } @@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; +#ifdef INTERNED_STRINGS assert(_PyUnicode_HASH(right_uni) != -1); Py_hash_t hash = _PyUnicode_HASH(left); if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { return 0; } +#endif return unicode_compare_eq(left, right_uni); } @@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p) return; } +#ifdef INTERNED_STRINGS if (PyUnicode_READY(s) == -1) { PyErr_Clear(); return; } - struct _Py_unicode_state *state = get_unicode_state(); - if (state->interned == NULL) { - state->interned = PyDict_New(); - if (state->interned == NULL) { + if (interned == NULL) { + interned = PyDict_New(); + if (interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - PyObject *t = PyDict_SetDefault(state->interned, s, s); + PyObject *t = PyDict_SetDefault(interned, s, s); if (t == NULL) { PyErr_Clear(); return; @@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p) this. */ Py_SET_REFCNT(s, Py_REFCNT(s) - 2); _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; +#else + // PyDict expects that interned strings have their hash + // (PyASCIIObject.hash) already computed. + (void)unicode_hash(s); +#endif } - void PyUnicode_InternImmortal(PyObject **p) { @@ -15900,11 +15923,15 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - struct _Py_unicode_state *state = &interp->unicode; - if (state->interned == NULL) { + if (!_Py_IsMainInterpreter(interp)) { + // interned dict is shared by all interpreters return; } - assert(PyDict_CheckExact(state->interned)); + + if (interned == NULL) { + return; + } + assert(PyDict_CheckExact(interned)); /* Interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the @@ -15912,13 +15939,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", - PyDict_GET_SIZE(state->interned)); + PyDict_GET_SIZE(interned)); Py_ssize_t immortal_size = 0, mortal_size = 0; #endif Py_ssize_t pos = 0; PyObject *s, *ignored_value; - while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { + while (PyDict_Next(interned, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); switch (PyUnicode_CHECK_INTERNED(s)) { @@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) mortal_size, immortal_size); #endif - PyDict_Clear(state->interned); - Py_CLEAR(state->interned); + PyDict_Clear(interned); + Py_CLEAR(interned); } @@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before - assert(state->interned == NULL); + if (_Py_IsMainInterpreter(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(interned == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec);