| |
@@ -0,0 +1,302 @@
|
| |
+ From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
| |
+ From: Victor Stinner <vstinner@python.org>
|
| |
+ Date: Thu, 6 Jan 2022 16:12:28 +0100
|
| |
+ Subject: [PATCH] 00373: bpo-46006: Revert "bpo-40521: Per-interpreter interned
|
| |
+ strings
|
| |
+
|
| |
+ This reverts commit ea251806b8dffff11b30d2182af1e589caf88acf.
|
| |
+
|
| |
+ Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for
|
| |
+ the main interpreter.
|
| |
+
|
| |
+ Keep _PyUnicode_ClearInterned() changes avoiding the creation of a
|
| |
+ temporary Python list object.
|
| |
+
|
| |
+ Leave the PyInterpreterState structure unchanged to keep the ABI
|
| |
+ backward compatibility with Python 3.10.0: rename the "interned"
|
| |
+ member to "unused_interned".
|
| |
+
|
| |
+ Fixes https://bugzilla.redhat.com/2030621
|
| |
+ ---
|
| |
+ Include/internal/pycore_interp.h | 12 +---
|
| |
+ .../2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst | 5 ++
|
| |
+ Objects/typeobject.c | 22 +++++++
|
| |
+ Objects/unicodeobject.c | 63 ++++++++++++++-----
|
| |
+ 4 files changed, 76 insertions(+), 26 deletions(-)
|
| |
+ create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst
|
| |
+
|
| |
+ diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
|
| |
+ index bfd082b588..4307b61ca3 100644
|
| |
+ --- a/Include/internal/pycore_interp.h
|
| |
+ +++ b/Include/internal/pycore_interp.h
|
| |
+ @@ -71,15 +71,9 @@ struct _Py_unicode_state {
|
| |
+ PyObject *latin1[256];
|
| |
+ struct _Py_unicode_fs_codec fs_codec;
|
| |
+
|
| |
+ - /* This dictionary holds all interned unicode strings. Note that references
|
| |
+ - to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
| |
+ - When the interned string reaches a refcnt of 0 the string deallocation
|
| |
+ - function will delete the reference from this dictionary.
|
| |
+ -
|
| |
+ - Another way to look at this is that to say that the actual reference
|
| |
+ - count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
| |
+ - */
|
| |
+ - PyObject *interned;
|
| |
+ + // Unused member kept for ABI backward compatibility with Python 3.10.0:
|
| |
+ + // see bpo-46006.
|
| |
+ + PyObject *unused_interned;
|
| |
+
|
| |
+ // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
|
| |
+ struct _Py_unicode_ids ids;
|
| |
+ diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst
|
| |
+ new file mode 100644
|
| |
+ index 0000000000..3acd2b0939
|
| |
+ --- /dev/null
|
| |
+ +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst
|
| |
+ @@ -0,0 +1,5 @@
|
| |
+ +Fix a regression when a type method like ``__init__()`` is modified in a
|
| |
+ +subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
|
| |
+ +``update_slot()``. Revert the change which made the Unicode dictionary of
|
| |
+ +interned strings compatible with subinterpreters: the internal interned
|
| |
+ +dictionary is shared again by all interpreters. Patch by Victor Stinner.
|
| |
+ diff --git a/Objects/typeobject.c b/Objects/typeobject.c
|
| |
+ index 02046e5f2e..b23e36a420 100644
|
| |
+ --- a/Objects/typeobject.c
|
| |
+ +++ b/Objects/typeobject.c
|
| |
+ @@ -50,6 +50,11 @@ typedef struct PySlot_Offset {
|
| |
+ } PySlot_Offset;
|
| |
+
|
| |
+
|
| |
+ +/* bpo-40521: Interned strings are shared by all subinterpreters */
|
| |
+ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
| |
+ +# define INTERN_NAME_STRINGS
|
| |
+ +#endif
|
| |
+ +
|
| |
+ /* alphabetical order */
|
| |
+ _Py_IDENTIFIER(__abstractmethods__);
|
| |
+ _Py_IDENTIFIER(__annotations__);
|
| |
+ @@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
| |
+ if (name == NULL)
|
| |
+ return -1;
|
| |
+ }
|
| |
+ +#ifdef INTERN_NAME_STRINGS
|
| |
+ if (!PyUnicode_CHECK_INTERNED(name)) {
|
| |
+ PyUnicode_InternInPlace(&name);
|
| |
+ if (!PyUnicode_CHECK_INTERNED(name)) {
|
| |
+ @@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
| |
+ return -1;
|
| |
+ }
|
| |
+ }
|
| |
+ +#endif
|
| |
+ }
|
| |
+ else {
|
| |
+ /* Will fail in _PyObject_GenericSetAttrWithDict. */
|
| |
+ @@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void)
|
| |
+ for (slotdef *p = slotdefs; p->name; p++) {
|
| |
+ /* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
| |
+ assert(!p[1].name || p->offset <= p[1].offset);
|
| |
+ +#ifdef INTERN_NAME_STRINGS
|
| |
+ p->name_strobj = PyUnicode_InternFromString(p->name);
|
| |
+ if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
| |
+ return _PyStatus_NO_MEMORY();
|
| |
+ }
|
| |
+ +#else
|
| |
+ + p->name_strobj = PyUnicode_FromString(p->name);
|
| |
+ + if (!p->name_strobj) {
|
| |
+ + return _PyStatus_NO_MEMORY();
|
| |
+ + }
|
| |
+ +#endif
|
| |
+ }
|
| |
+ slotdefs_initialized = 1;
|
| |
+ return _PyStatus_OK();
|
| |
+ @@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name)
|
| |
+ int offset;
|
| |
+
|
| |
+ assert(PyUnicode_CheckExact(name));
|
| |
+ +#ifdef INTERN_NAME_STRINGS
|
| |
+ assert(PyUnicode_CHECK_INTERNED(name));
|
| |
+ +#endif
|
| |
+
|
| |
+ assert(slotdefs_initialized);
|
| |
+ pp = ptrs;
|
| |
+ for (p = slotdefs; p->name; p++) {
|
| |
+ assert(PyUnicode_CheckExact(p->name_strobj));
|
| |
+ assert(PyUnicode_CheckExact(name));
|
| |
+ +#ifdef INTERN_NAME_STRINGS
|
| |
+ if (p->name_strobj == name) {
|
| |
+ *pp++ = p;
|
| |
+ }
|
| |
+ +#else
|
| |
+ + if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
|
| |
+ + *pp++ = p;
|
| |
+ + }
|
| |
+ +#endif
|
| |
+ }
|
| |
+ *pp = NULL;
|
| |
+ for (pp = ptrs; *pp; pp++) {
|
| |
+ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
|
| |
+ index c72871074b..077cf8d7f4 100644
|
| |
+ --- a/Objects/unicodeobject.c
|
| |
+ +++ b/Objects/unicodeobject.c
|
| |
+ @@ -211,6 +211,22 @@ extern "C" {
|
| |
+ # define OVERALLOCATE_FACTOR 4
|
| |
+ #endif
|
| |
+
|
| |
+ +/* bpo-40521: Interned strings are shared by all interpreters. */
|
| |
+ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
| |
+ +# define INTERNED_STRINGS
|
| |
+ +#endif
|
| |
+ +
|
| |
+ +/* This dictionary holds all interned unicode strings. Note that references
|
| |
+ + to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
| |
+ + When the interned string reaches a refcnt of 0 the string deallocation
|
| |
+ + function will delete the reference from this dictionary.
|
| |
+ +
|
| |
+ + Another way to look at this is that to say that the actual reference
|
| |
+ + count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
| |
+ +*/
|
| |
+ +#ifdef INTERNED_STRINGS
|
| |
+ +static PyObject *interned = NULL;
|
| |
+ +#endif
|
| |
+
|
| |
+ static struct _Py_unicode_state*
|
| |
+ get_unicode_state(void)
|
| |
+ @@ -1936,7 +1952,7 @@ unicode_dealloc(PyObject *unicode)
|
| |
+
|
| |
+ case SSTATE_INTERNED_MORTAL:
|
| |
+ {
|
| |
+ - struct _Py_unicode_state *state = get_unicode_state();
|
| |
+ +#ifdef INTERNED_STRINGS
|
| |
+ /* Revive the dead object temporarily. PyDict_DelItem() removes two
|
| |
+ references (key and value) which were ignored by
|
| |
+ PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
|
| |
+ @@ -1944,12 +1960,13 @@ unicode_dealloc(PyObject *unicode)
|
| |
+ PyDict_DelItem(). */
|
| |
+ assert(Py_REFCNT(unicode) == 0);
|
| |
+ Py_SET_REFCNT(unicode, 3);
|
| |
+ - if (PyDict_DelItem(state->interned, unicode) != 0) {
|
| |
+ + if (PyDict_DelItem(interned, unicode) != 0) {
|
| |
+ _PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
| |
+ NULL);
|
| |
+ }
|
| |
+ assert(Py_REFCNT(unicode) == 1);
|
| |
+ Py_SET_REFCNT(unicode, 0);
|
| |
+ +#endif
|
| |
+ break;
|
| |
+ }
|
| |
+
|
| |
+ @@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
| |
+ if (PyUnicode_CHECK_INTERNED(left))
|
| |
+ return 0;
|
| |
+
|
| |
+ +#ifdef INTERNED_STRINGS
|
| |
+ assert(_PyUnicode_HASH(right_uni) != -1);
|
| |
+ Py_hash_t hash = _PyUnicode_HASH(left);
|
| |
+ if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
|
| |
+ return 0;
|
| |
+ }
|
| |
+ +#endif
|
| |
+
|
| |
+ return unicode_compare_eq(left, right_uni);
|
| |
+ }
|
| |
+ @@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
|
| |
+ return;
|
| |
+ }
|
| |
+
|
| |
+ +#ifdef INTERNED_STRINGS
|
| |
+ if (PyUnicode_READY(s) == -1) {
|
| |
+ PyErr_Clear();
|
| |
+ return;
|
| |
+ }
|
| |
+
|
| |
+ - struct _Py_unicode_state *state = get_unicode_state();
|
| |
+ - if (state->interned == NULL) {
|
| |
+ - state->interned = PyDict_New();
|
| |
+ - if (state->interned == NULL) {
|
| |
+ + if (interned == NULL) {
|
| |
+ + interned = PyDict_New();
|
| |
+ + if (interned == NULL) {
|
| |
+ PyErr_Clear(); /* Don't leave an exception */
|
| |
+ return;
|
| |
+ }
|
| |
+ }
|
| |
+
|
| |
+ - PyObject *t = PyDict_SetDefault(state->interned, s, s);
|
| |
+ + PyObject *t = PyDict_SetDefault(interned, s, s);
|
| |
+ if (t == NULL) {
|
| |
+ PyErr_Clear();
|
| |
+ return;
|
| |
+ @@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
|
| |
+ this. */
|
| |
+ Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
| |
+ _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
| |
+ +#else
|
| |
+ + // PyDict expects that interned strings have their hash
|
| |
+ + // (PyASCIIObject.hash) already computed.
|
| |
+ + (void)unicode_hash(s);
|
| |
+ +#endif
|
| |
+ }
|
| |
+
|
| |
+ -
|
| |
+ void
|
| |
+ PyUnicode_InternImmortal(PyObject **p)
|
| |
+ {
|
| |
+ @@ -15900,11 +15923,15 @@ PyUnicode_InternFromString(const char *cp)
|
| |
+ void
|
| |
+ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
| |
+ {
|
| |
+ - struct _Py_unicode_state *state = &interp->unicode;
|
| |
+ - if (state->interned == NULL) {
|
| |
+ + if (!_Py_IsMainInterpreter(interp)) {
|
| |
+ + // interned dict is shared by all interpreters
|
| |
+ return;
|
| |
+ }
|
| |
+ - assert(PyDict_CheckExact(state->interned));
|
| |
+ +
|
| |
+ + if (interned == NULL) {
|
| |
+ + return;
|
| |
+ + }
|
| |
+ + assert(PyDict_CheckExact(interned));
|
| |
+
|
| |
+ /* Interned unicode strings are not forcibly deallocated; rather, we give
|
| |
+ them their stolen references back, and then clear and DECREF the
|
| |
+ @@ -15912,13 +15939,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
| |
+
|
| |
+ #ifdef INTERNED_STATS
|
| |
+ fprintf(stderr, "releasing %zd interned strings\n",
|
| |
+ - PyDict_GET_SIZE(state->interned));
|
| |
+ + PyDict_GET_SIZE(interned));
|
| |
+
|
| |
+ Py_ssize_t immortal_size = 0, mortal_size = 0;
|
| |
+ #endif
|
| |
+ Py_ssize_t pos = 0;
|
| |
+ PyObject *s, *ignored_value;
|
| |
+ - while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
|
| |
+ + while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
|
| |
+ assert(PyUnicode_IS_READY(s));
|
| |
+
|
| |
+ switch (PyUnicode_CHECK_INTERNED(s)) {
|
| |
+ @@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
| |
+ mortal_size, immortal_size);
|
| |
+ #endif
|
| |
+
|
| |
+ - PyDict_Clear(state->interned);
|
| |
+ - Py_CLEAR(state->interned);
|
| |
+ + PyDict_Clear(interned);
|
| |
+ + Py_CLEAR(interned);
|
| |
+ }
|
| |
+
|
| |
+
|
| |
+ @@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
|
| |
+ {
|
| |
+ struct _Py_unicode_state *state = &interp->unicode;
|
| |
+
|
| |
+ - // _PyUnicode_ClearInterned() must be called before
|
| |
+ - assert(state->interned == NULL);
|
| |
+ + if (_Py_IsMainInterpreter(interp)) {
|
| |
+ + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
|
| |
+ + assert(interned == NULL);
|
| |
+ + }
|
| |
+
|
| |
+ _PyUnicode_FiniEncodings(&state->fs_codec);
|
| |
+
|
| |
Fixes https://bugzilla.redhat.com/2030621
Fixes https://bugzilla.redhat.com/2034962