From 0e995ad361ec91755581bb06693fba9d0bf6c49e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 12 Mar 2025 15:26:52 -0700 Subject: [PATCH 01/45] wip: update type slots, stop-the-world --- Include/internal/pycore_interp.h | 1 + Objects/typeobject.c | 53 ++++++++++++++++++++++ Python/clinic/sysmodule.c.h | 20 +++++++- Python/pystate.c | 2 + Python/sysmodule.c | 22 +++++++++ Tools/tsan/suppressions_free_threading.txt | 6 +-- 6 files changed, 100 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index c247c7270d1caa..0b801df1126b61 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -57,6 +57,7 @@ struct _stoptheworld_state { bool requested; // Set when a pause is requested. bool world_stopped; // Set when the world is stopped. bool is_global; // Set when contained in PyRuntime struct. + Py_ssize_t world_stops; // Number of times the world was stopped. PyEvent stop_event; // Set when thread_countdown reaches zero. Py_ssize_t thread_countdown; // Number of threads that must pause. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bc840ed51ffe4c..61f87cd8c9b7b8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3490,6 +3490,9 @@ static int update_slot(PyTypeObject *, PyObject *); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); +#ifdef Py_GIL_DISABLED +static bool has_slotdef(PyObject *); +#endif /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -5943,6 +5946,23 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } +#ifdef Py_GIL_DISABLED +static int +update_slot_world_stopped(PyTypeObject *type, PyObject *name) +{ + // Modification of type slots is protected by the global type + // lock. However, type slots are read non-atomically without holding the + // type lock. So, we need to stop-the-world while modifying slots, in + // order to avoid data races. This is unfortunately quite expensive. + int ret; + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + ret = update_slot(type, name); + _PyEval_StartTheWorld(interp); + return ret; +} +#endif + static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -5972,9 +5992,15 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } +#if Py_GIL_DISABLED + if (is_dunder_name(name) && has_slotdef(name)) { + return update_slot_world_stopped(type, name); + } +#else if (is_dunder_name(name)) { return update_slot(type, name); } +#endif return 0; } @@ -11002,6 +11028,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } +#ifdef Py_GIL_DISABLED +// Return true if "name" corresponds to at least one slot definition. This is +// used to avoid calling update_slot() if is_dunder_name() is true but it's +// not actually a slot. +static bool +has_slotdef(PyObject *name) +{ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) { + return true; + } + } + return false; +} +#endif /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * @@ -11241,6 +11282,7 @@ fixup_slot_dispatchers(PyTypeObject *type) END_TYPE_LOCK(); } +// Called when __bases__ is re-assigned. static void update_all_slots(PyTypeObject* type) { @@ -11248,6 +11290,13 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); + // Similar to update_slot_world_stopped(), this is required to + // avoid races. We do it once here rather than once per-slot. +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); +#endif + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); @@ -11255,6 +11304,10 @@ update_all_slots(PyTypeObject* type) /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } + +#ifdef Py_GIL_DISABLED + _PyEval_StartTheWorld(interp); +#endif } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 1e53624d4d45d7..9a1444f636faa4 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1711,6 +1711,24 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) return return_value; } +PyDoc_STRVAR(sys__get_world_stops__doc__, +"_get_world_stops($module, /)\n" +"--\n" +"\n" +"Return the number of times the \"stop-the-world\" lock was used."); + +#define SYS__GET_WORLD_STOPS_METHODDEF \ + {"_get_world_stops", (PyCFunction)sys__get_world_stops, METH_NOARGS, sys__get_world_stops__doc__}, + +static PyObject * +sys__get_world_stops_impl(PyObject *module); + +static PyObject * +sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys__get_world_stops_impl(module); +} + #ifndef SYS_GETWINDOWSVERSION_METHODDEF #define SYS_GETWINDOWSVERSION_METHODDEF #endif /* !defined(SYS_GETWINDOWSVERSION_METHODDEF) */ @@ -1754,4 +1772,4 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=1e5f608092c12636 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=35e1d29a46e54085 input=a9049054013a1b77]*/ diff --git a/Python/pystate.c b/Python/pystate.c index fcd12d1b933360..68cf495d15313d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2355,6 +2355,7 @@ start_the_world(struct _stoptheworld_state *stw) assert(PyMutex_IsLocked(&stw->mutex)); HEAD_LOCK(runtime); + stw->world_stops++; stw->requested = 0; stw->world_stopped = 0; // Switch threads back to the detached state. @@ -2730,6 +2731,7 @@ _PyGILState_Fini(PyInterpreterState *interp) return; } interp->runtime->gilstate.autoInterpreterState = NULL; + //fprintf(stderr, "world stops %zd\n", interp->stoptheworld.world_stops); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index adaa5ee1c74aa5..ee4ee2765c1bfe 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2527,6 +2527,27 @@ sys__is_gil_enabled_impl(PyObject *module) #endif } +/*[clinic input] +sys._get_world_stops + +Return the number of times the "stop-the-world" condition was true. +[clinic start generated code]*/ + +static PyObject * +sys__get_world_stops_impl(PyObject *module) +/*[clinic end generated code: output=7886d32b71a94e72 input=44a9bde7e07b30e3]*/ +{ + Py_ssize_t stops; +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + stops = interp->stoptheworld.world_stops; +#else + stops = 0; +#endif + return PyLong_FromLong(stops); +} + + static PerfMapState perf_map_state; @@ -2703,6 +2724,7 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF + SYS__GET_WORLD_STOPS_METHODDEF SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index fd47c85af1adb1..d2763714ecf1b7 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -14,7 +14,7 @@ race_top:assign_version_tag race_top:_multiprocessing_SemLock_acquire_impl -race_top:_Py_slot_tp_getattr_hook +#race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_multiprocessing_SemLock_release_impl @@ -22,7 +22,7 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -race_top:type_modified_unlocked +#race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -32,7 +32,7 @@ race_top:rangeiter_next race_top:mi_block_set_nextx # gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -race_top:update_one_slot +#race_top:update_one_slot # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create From b173f1769c638f21c100e55aefc5e0e60cc84609 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 13 Mar 2025 15:03:04 -0700 Subject: [PATCH 02/45] Remove unneeded atomics for tp_flags. --- Include/internal/pycore_object.h | 2 +- Include/object.h | 6 +----- Objects/typeobject.c | 6 ++---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0b686b416ec193..7a0793f7b44474 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -311,7 +311,7 @@ extern int _PyDict_CheckConsistency(PyObject *mp, int check_content); // Fast inlined version of PyType_HasFeature() static inline int _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { - return ((FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags) & feature) != 0); + return ((type->tp_flags) & feature) != 0; } extern void _PyType_InitCache(PyInterpreterState *interp); diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..d23918c2da67f9 100644 --- a/Include/object.h +++ b/Include/object.h @@ -776,11 +776,7 @@ PyType_HasFeature(PyTypeObject *type, unsigned long feature) // PyTypeObject is opaque in the limited C API flags = PyType_GetFlags(type); #else -# ifdef Py_GIL_DISABLED - flags = _Py_atomic_load_ulong_relaxed(&type->tp_flags); -# else - flags = type->tp_flags; -# endif + flags = type->tp_flags; #endif return ((flags & feature) != 0); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 61f87cd8c9b7b8..ad8431908f1922 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -353,9 +353,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // held when flags are modified. ASSERT_TYPE_LOCK_HELD(); } - // Since PyType_HasFeature() reads the flags without holding the type - // lock, we need an atomic store here. - FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); + tp->tp_flags = flags; } static void @@ -3690,7 +3688,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) unsigned long PyType_GetFlags(PyTypeObject *type) { - return FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags); + return type->tp_flags; } From d4ce112310c8bfe2683a2ccedc4323ec591c771f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 13 Mar 2025 16:25:24 -0700 Subject: [PATCH 03/45] Use stop-the-world for tp_flag changes too. --- Objects/typeobject.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ad8431908f1922..96802e588a6e7d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -84,6 +84,17 @@ class object "PyObject *" "&PyBaseObject_Type" #endif +// Modification of type slots and type flags is protected by the global type +// lock. However, the slots and flags are read non-atomically without holding +// the type lock. So, we need to stop-the-world while modifying these, in +// order to avoid data races. This is unfortunately a bit expensive. +#ifdef Py_GIL_DISABLED +// If defined, type slot updates (after initial creation) will stop-the-world. +#define TYPE_SLOT_UPDATE_NEEDS_STOP +// If defined, type flag updates (after initial creation) will stop-the-world. +#define TYPE_FLAGS_UPDATE_NEEDS_STOP +#endif + #define PyTypeObject_CAST(op) ((PyTypeObject *)(op)) typedef struct PySlot_Offset { @@ -1582,9 +1593,10 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) BEGIN_TYPE_LOCK(); type_modified_unlocked(type); if (abstract) - type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); + _PyType_SetFlags(type, 0, Py_TPFLAGS_IS_ABSTRACT); else - type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); + _PyType_SetFlags(type, Py_TPFLAGS_IS_ABSTRACT, 0); + END_TYPE_LOCK(); return 0; @@ -5780,7 +5792,17 @@ void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { BEGIN_TYPE_LOCK(); - type_set_flags_with_mask(self, mask, flags); + unsigned long new_flags = (self->tp_flags & ~mask) | flags; + if (new_flags != self->tp_flags) { +#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); +#endif + self->tp_flags = new_flags; +#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP + _PyEval_StartTheWorld(interp); +#endif + } END_TYPE_LOCK(); } @@ -5944,14 +5966,10 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP static int update_slot_world_stopped(PyTypeObject *type, PyObject *name) { - // Modification of type slots is protected by the global type - // lock. However, type slots are read non-atomically without holding the - // type lock. So, we need to stop-the-world while modifying slots, in - // order to avoid data races. This is unfortunately quite expensive. int ret; PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); @@ -5990,7 +6008,7 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } -#if Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP if (is_dunder_name(name) && has_slotdef(name)) { return update_slot_world_stopped(type, name); } @@ -11026,10 +11044,9 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP // Return true if "name" corresponds to at least one slot definition. This is -// used to avoid calling update_slot() if is_dunder_name() is true but it's -// not actually a slot. +// a more accurate but more expensive test compared to is_dunder_name(). static bool has_slotdef(PyObject *name) { @@ -11288,9 +11305,9 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP // Similar to update_slot_world_stopped(), this is required to // avoid races. We do it once here rather than once per-slot. -#ifdef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); #endif @@ -11303,7 +11320,7 @@ update_all_slots(PyTypeObject* type) update_slot(type, p->name_strobj); } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP _PyEval_StartTheWorld(interp); #endif } From 44eb332fd5754f00cdd7c30de890738e1167580e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 11:54:59 -0700 Subject: [PATCH 04/45] Remove 'world_stops' and 'sys._get_world_stops'. This would be better to be part of the pystats info. --- Include/internal/pycore_interp.h | 1 - Python/clinic/sysmodule.c.h | 20 +------------------- Python/pystate.c | 2 -- Python/sysmodule.c | 22 ---------------------- 4 files changed, 1 insertion(+), 44 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0b801df1126b61..c247c7270d1caa 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -57,7 +57,6 @@ struct _stoptheworld_state { bool requested; // Set when a pause is requested. bool world_stopped; // Set when the world is stopped. bool is_global; // Set when contained in PyRuntime struct. - Py_ssize_t world_stops; // Number of times the world was stopped. PyEvent stop_event; // Set when thread_countdown reaches zero. Py_ssize_t thread_countdown; // Number of threads that must pause. diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 9a1444f636faa4..1e53624d4d45d7 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1711,24 +1711,6 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) return return_value; } -PyDoc_STRVAR(sys__get_world_stops__doc__, -"_get_world_stops($module, /)\n" -"--\n" -"\n" -"Return the number of times the \"stop-the-world\" lock was used."); - -#define SYS__GET_WORLD_STOPS_METHODDEF \ - {"_get_world_stops", (PyCFunction)sys__get_world_stops, METH_NOARGS, sys__get_world_stops__doc__}, - -static PyObject * -sys__get_world_stops_impl(PyObject *module); - -static PyObject * -sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) -{ - return sys__get_world_stops_impl(module); -} - #ifndef SYS_GETWINDOWSVERSION_METHODDEF #define SYS_GETWINDOWSVERSION_METHODDEF #endif /* !defined(SYS_GETWINDOWSVERSION_METHODDEF) */ @@ -1772,4 +1754,4 @@ sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=35e1d29a46e54085 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1e5f608092c12636 input=a9049054013a1b77]*/ diff --git a/Python/pystate.c b/Python/pystate.c index 68cf495d15313d..fcd12d1b933360 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2355,7 +2355,6 @@ start_the_world(struct _stoptheworld_state *stw) assert(PyMutex_IsLocked(&stw->mutex)); HEAD_LOCK(runtime); - stw->world_stops++; stw->requested = 0; stw->world_stopped = 0; // Switch threads back to the detached state. @@ -2731,7 +2730,6 @@ _PyGILState_Fini(PyInterpreterState *interp) return; } interp->runtime->gilstate.autoInterpreterState = NULL; - //fprintf(stderr, "world stops %zd\n", interp->stoptheworld.world_stops); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ee4ee2765c1bfe..adaa5ee1c74aa5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2527,27 +2527,6 @@ sys__is_gil_enabled_impl(PyObject *module) #endif } -/*[clinic input] -sys._get_world_stops - -Return the number of times the "stop-the-world" condition was true. -[clinic start generated code]*/ - -static PyObject * -sys__get_world_stops_impl(PyObject *module) -/*[clinic end generated code: output=7886d32b71a94e72 input=44a9bde7e07b30e3]*/ -{ - Py_ssize_t stops; -#ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - stops = interp->stoptheworld.world_stops; -#else - stops = 0; -#endif - return PyLong_FromLong(stops); -} - - static PerfMapState perf_map_state; @@ -2724,7 +2703,6 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF - SYS__GET_WORLD_STOPS_METHODDEF SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; From d132fab96784d297d30250d2c92c431651058ef2 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 12:06:43 -0700 Subject: [PATCH 05/45] Improve code comments. --- Objects/typeobject.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 96802e588a6e7d..65f6442edd2350 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5979,6 +5979,8 @@ update_slot_world_stopped(PyTypeObject *type, PyObject *name) } #endif +// Called by type_setattro(). Updates both the type dict and +// any type slots that correspond to the modified entry. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -11306,8 +11308,9 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); #ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - // Similar to update_slot_world_stopped(), this is required to - // avoid races. We do it once here rather than once per-slot. + // Similar to update_slot_world_stopped(), this is required to avoid + // races. We don't use update_slot_world_stopped() here because we want + // to stop once rather than once per slot. PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); #endif From 658bcd52ab4a85134bed94eb6f30ce94a03924f7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 12:07:13 -0700 Subject: [PATCH 06/45] Remove TSAN suppressions that seem unneeded. --- Tools/tsan/suppressions_free_threading.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index d2763714ecf1b7..17a0d8dc6a0f01 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -14,7 +14,6 @@ race_top:assign_version_tag race_top:_multiprocessing_SemLock_acquire_impl -#race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_multiprocessing_SemLock_release_impl @@ -22,7 +21,6 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -#race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -31,9 +29,6 @@ race_top:rangeiter_next # gh-129748: test.test_free_threading.test_slots.TestSlots.test_object race_top:mi_block_set_nextx -# gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -#race_top:update_one_slot - # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create From ef2f07b47475faeb631f8ac89f8bc4eec865e6ed Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 13:08:26 -0700 Subject: [PATCH 07/45] Add NEWS file. --- .../2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst new file mode 100644 index 00000000000000..b26977628de136 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst @@ -0,0 +1,6 @@ +In the free-threaded build, avoid data races caused by updating type slots +or type flags after the type was initially created. For those (typically +rare) cases, use the stop-the-world mechanism. Remove the use of atomics +when reading or writing type flags. The use of atomics is not sufficient to +avoid races (since flags are sometimes read without a lock and without +atomics) and are no longer required. From ce8536db3f59037a44db845f148e9610dfd080ad Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 22 Mar 2025 15:34:45 -0700 Subject: [PATCH 08/45] Use mutex rather than critical sections. For TYPE_LOCK, replace critical sections with a simple mutex. A number of changes were needed to avoid deadlocking on reentrancy (trying to re-acquire the mutex). Split _PyType_LookupRefAndVersion() into a locked and unlocked version. Remove atomic operations where they are not required. Remove some cases of TYPE_LOCK being held that is not required. --- Include/internal/pycore_typeobject.h | 2 + Include/object.h | 4 +- Objects/typeobject.c | 578 +++++++++++++++++---------- 3 files changed, 372 insertions(+), 212 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 581153344a8e05..677212452eb9ee 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -144,6 +144,8 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; + // used to check correct usage of the above mutex + unsigned long long mutex_tid; // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE diff --git a/Include/object.h b/Include/object.h index d23918c2da67f9..82b7ede695d819 100644 --- a/Include/object.h +++ b/Include/object.h @@ -576,8 +576,8 @@ given type object has a specified feature. /* Objects behave like an unbound method */ #define Py_TPFLAGS_METHOD_DESCRIPTOR (1UL << 17) -/* Unused. Legacy flag */ -#define Py_TPFLAGS_VALID_VERSION_TAG (1UL << 19) +/* Type structure is potentially exposed (revealed) to other threads */ +#define Py_TPFLAGS_EXPOSED (1UL << 19) /* Type is abstract and cannot be instantiated */ #define Py_TPFLAGS_IS_ABSTRACT (1UL << 20) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 65f6442edd2350..2408a1cf3c85de 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -62,39 +62,100 @@ class object "PyObject *" "&PyBaseObject_Type" // in odd behaviors w.r.t. running with the GIL as the outer type lock could // be released and reacquired during a subclass update if there's contention // on the subclass lock. +// +// While modification of type slots and type flags is protected by the +// global types lock. However, the slots and flags are read non-atomically +// without holding the type lock. So, we need to stop-the-world while +// modifying these, in order to avoid data races. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) -#define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() -#define BEGIN_TYPE_DICT_LOCK(d) \ - Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) +#ifdef Py_DEBUG +// Used to check for correct use of the TYPE_LOCK mutex. It is a simple +// mutex and does not support re-entrancy. If we already hold the lock and +// try to acquire it again with the same thread, it is a bug on the code. +#define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +#endif + +static bool +types_world_is_stopped(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return interp->stoptheworld.world_stopped; +} + +// Checks that either the type has not yet been exposed (revealed) to other +// threads (it was newly created and the Py_TPFLAGS_EXPOSED flag is not set +// yet) or the world is stopped and we can safely update it without races. +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert((((tp)->tp_flags & Py_TPFLAGS_EXPOSED) == 0) || types_world_is_stopped()) + +static void +types_stop_world(void) +{ + assert(!types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + assert(types_world_is_stopped()); +} + +static void +types_start_world(void) +{ + assert(types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StartTheWorld(interp); + assert(!types_world_is_stopped()); +} + +static bool +types_mutex_is_owned(void) +{ + PyThread_ident_t tid = PyThread_get_thread_ident_ex(); + return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; +} + +static void +types_mutex_set_owned(PyThread_ident_t tid) +{ + _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); +} + +static void +types_mutex_lock(void) +{ + if (types_world_is_stopped()) { + return; + } + assert(!types_mutex_is_owned()); + PyMutex_Lock(TYPE_LOCK); + types_mutex_set_owned(PyThread_get_thread_ident_ex()); +} -#define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +static void +types_mutex_unlock(void) +{ + if (types_world_is_stopped()) { + return; + } + types_mutex_set_owned(0); + PyMutex_Unlock(TYPE_LOCK); +} #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + {if (!types_world_is_stopped()) assert(types_mutex_is_owned());} #else -#define BEGIN_TYPE_LOCK() -#define END_TYPE_LOCK() -#define BEGIN_TYPE_DICT_LOCK(d) -#define END_TYPE_DICT_LOCK() +#define types_mutex_lock() +#define types_mutex_unlock() +#define types_world_is_stopped() 1 +#define types_stop_world() +#define types_start_world() #define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) #endif -// Modification of type slots and type flags is protected by the global type -// lock. However, the slots and flags are read non-atomically without holding -// the type lock. So, we need to stop-the-world while modifying these, in -// order to avoid data races. This is unfortunately a bit expensive. -#ifdef Py_GIL_DISABLED -// If defined, type slot updates (after initial creation) will stop-the-world. -#define TYPE_SLOT_UPDATE_NEEDS_STOP -// If defined, type flag updates (after initial creation) will stop-the-world. -#define TYPE_FLAGS_UPDATE_NEEDS_STOP -#endif - #define PyTypeObject_CAST(op) ((PyTypeObject *)(op)) typedef struct PySlot_Offset { @@ -111,6 +172,9 @@ releasebuffer_call_python(PyObject *self, Py_buffer *buffer); static PyObject * slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static PyObject * +lookup_method_optional_lock(PyObject *self, PyObject *attr, int *unbound, bool lock_held); + static PyObject * lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound); @@ -364,6 +428,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // held when flags are modified. ASSERT_TYPE_LOCK_HELD(); } + ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -371,6 +436,7 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -496,10 +562,8 @@ _PyType_GetBases(PyTypeObject *self) { PyObject *res; - BEGIN_TYPE_LOCK(); res = lookup_tp_bases(self); Py_INCREF(res); - END_TYPE_LOCK(); return res; } @@ -508,6 +572,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); + ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -551,35 +616,21 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_TYPE_LOCK_HELD(); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { -#ifdef Py_GIL_DISABLED - PyObject *mro = _Py_atomic_load_ptr_relaxed(&self->tp_mro); - if (mro == NULL) { - return NULL; - } - if (_Py_TryIncrefCompare(&self->tp_mro, mro)) { - return mro; - } - - BEGIN_TYPE_LOCK(); - mro = lookup_tp_mro(self); - Py_XINCREF(mro); - END_TYPE_LOCK(); - return mro; -#else + // This is safe in the free-threaded build because tp_mro cannot be + // assigned unless the type is new or if world is stopped. return Py_XNewRef(lookup_tp_mro(self)); -#endif } static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { + ASSERT_NEW_OR_STOPPED(self); assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_mro can probably be statically allocated for each @@ -1008,10 +1059,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); - END_TYPE_LOCK(); + types_mutex_unlock(); return 0; } @@ -1142,9 +1193,9 @@ PyType_Modified(PyTypeObject *type) return; } - BEGIN_TYPE_LOCK(); + types_mutex_lock(); type_modified_unlocked(type); - END_TYPE_LOCK(); + types_mutex_unlock(); } static int @@ -1169,15 +1220,16 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { int unbound; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (custom) { PyObject *mro_meth, *type_mro_meth; - mro_meth = lookup_maybe_method( - (PyObject *)type, &_Py_ID(mro), &unbound); + mro_meth = lookup_method_optional_lock( + (PyObject *)type, &_Py_ID(mro), &unbound, true); if (mro_meth == NULL) { goto clear; } - type_mro_meth = lookup_maybe_method( - (PyObject *)&PyType_Type, &_Py_ID(mro), &unbound); + type_mro_meth = lookup_method_optional_lock( + (PyObject *)&PyType_Type, &_Py_ID(mro), &unbound, true); if (type_mro_meth == NULL) { Py_DECREF(mro_meth); goto clear; @@ -1229,9 +1281,9 @@ void _PyType_SetVersion(PyTypeObject *tp, unsigned int version) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); set_version_unlocked(tp, version); - END_TYPE_LOCK(); + types_mutex_unlock(); } PyTypeObject * @@ -1254,7 +1306,11 @@ _PyType_LookupByVersion(unsigned int version) unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp) { +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_uint32_acquire(&tp->tp_version_tag); +#else return tp->tp_version_tag; +#endif } @@ -1317,9 +1373,9 @@ int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); int assigned; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assigned = assign_version_tag(interp, type); - END_TYPE_LOCK(); + types_mutex_unlock(); return assigned; } @@ -1590,14 +1646,13 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) return -1; } - BEGIN_TYPE_LOCK(); + types_stop_world(); type_modified_unlocked(type); if (abstract) - _PyType_SetFlags(type, 0, Py_TPFLAGS_IS_ABSTRACT); + type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else - _PyType_SetFlags(type, Py_TPFLAGS_IS_ABSTRACT, 0); - - END_TYPE_LOCK(); + type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); + types_start_world(); return 0; } @@ -1619,7 +1674,6 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); PyObject *mro; - BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(type); if (mro == NULL) { mro = Py_None; @@ -1627,7 +1681,6 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) Py_INCREF(mro); } - END_TYPE_LOCK(); return mro; } @@ -1651,6 +1704,7 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -1722,6 +1776,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) return -1; } assert(new_bases != NULL); + assert(types_world_is_stopped()); if (!PyTuple_Check(new_bases)) { PyErr_Format(PyExc_TypeError, @@ -1856,9 +1911,18 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); int res; - BEGIN_TYPE_LOCK(); + bool need_stop = !types_world_is_stopped(); + if (need_stop) { + // This function can be re-entered. We want to avoid trying to stop + // the world a second time if that happens. Example call chain: + // mro_invoke() -> call_unbound_noarg() -> type_setattro() -> + // type_set_bases() + types_stop_world(); + } res = type_set_bases_unlocked(type, new_bases); - END_TYPE_LOCK(); + if (need_stop) { + types_start_world(); + } return res; } @@ -2810,10 +2874,26 @@ _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or return res; } +#ifdef Py_GIL_DISABLED +static PyObject *type_lookup_lock_held(PyTypeObject *type, PyObject *name, unsigned int *version); +#endif + static PyObject * -lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) +lookup_method_optional_lock(PyObject *self, PyObject *attr, int *unbound, bool lock_held) { - PyObject *res = _PyType_LookupRef(Py_TYPE(self), attr); + PyObject *res; +#ifdef Py_GIL_DISABLED + if (lock_held) { + // The TYPE_LOCK mutex is already held by the caller. + res = type_lookup_lock_held(Py_TYPE(self), attr, NULL); + } + else { + // If the MCACHE lookup fails, this will acquire the TYPE_LOCK mutex. + res = _PyType_LookupRef(Py_TYPE(self), attr); + } +#else + res = _PyType_LookupRef(Py_TYPE(self), attr); +#endif if (res == NULL) { return NULL; } @@ -2832,6 +2912,12 @@ lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) return res; } +static PyObject * +lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) +{ + return lookup_method_optional_lock(self, attr, unbound, false); +} + static PyObject * lookup_method(PyObject *self, PyObject *attr, int *unbound) { @@ -2956,11 +3042,16 @@ tail_contains(PyObject *tuple, Py_ssize_t whence, PyObject *o) static PyObject * class_name(PyObject *cls) { +#ifdef Py_GIL_DISABLED + // Avoid re-entrancy and use tp_name + return type_name(cls, NULL); +#else PyObject *name; if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } return name; +#endif } static int @@ -3125,12 +3216,11 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation_unlocked(PyTypeObject *type) +mro_implementation(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); - if (!_PyType_IsReady(type)) { - if (PyType_Ready(type) < 0) + int res = PyType_Ready(type); + if (res < 0) return NULL; } @@ -3208,16 +3298,6 @@ mro_implementation_unlocked(PyTypeObject *type) return result; } -static PyObject * -mro_implementation(PyTypeObject *type) -{ - PyObject *mro; - BEGIN_TYPE_LOCK(); - mro = mro_implementation_unlocked(type); - END_TYPE_LOCK(); - return mro; -} - /*[clinic input] type.mro @@ -3289,20 +3369,29 @@ mro_invoke(PyTypeObject *type) PyObject *new_mro; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { int unbound; - PyObject *mro_meth = lookup_method( - (PyObject *)type, &_Py_ID(mro), &unbound); - if (mro_meth == NULL) + PyObject *mro_meth = lookup_method_optional_lock( + (PyObject *)type, &_Py_ID(mro), &unbound, true); + if (mro_meth == NULL) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_AttributeError, "mro"); + } return NULL; + } + // FIXME: review unlock + types_mutex_unlock(); + // Possible re-entrancy, we don't know what this custom mro() method might do mro_result = call_unbound_noarg(unbound, mro_meth, (PyObject *)type); + types_mutex_lock(); Py_DECREF(mro_meth); } else { - mro_result = mro_implementation_unlocked(type); + mro_result = mro_implementation(type); } if (mro_result == NULL) return NULL; @@ -3352,6 +3441,7 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3401,9 +3491,8 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; - BEGIN_TYPE_LOCK(); + ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); - END_TYPE_LOCK(); return res; } @@ -4482,6 +4571,7 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); + type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5195,6 +5285,7 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); + type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: if (PyErr_Occurred()) { @@ -5329,7 +5420,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5356,7 +5447,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } - END_TYPE_LOCK(); + types_mutex_unlock(); if (res == NULL) { PyErr_Format( @@ -5488,8 +5579,6 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { - ASSERT_TYPE_LOCK_HELD(); - Py_hash_t hash = _PyObject_HashFast(name); if (hash == -1) { *error = -1; @@ -5567,12 +5656,15 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio return old_name; } -#if Py_GIL_DISABLED - static void -update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) +maybe_update_cache(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) { + if (version_tag == 0) { + return; // 0 is not a valid version + } + PyObject *old_value; +#if Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5584,16 +5676,16 @@ update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, return; } - PyObject *old_value = update_cache(entry, name, version_tag, value); + old_value = update_cache(entry, name, version_tag, value); // Then update sequence to the next valid value _PySeqLock_UnlockWrite(&entry->sequence); - +#else + old_value = update_cache(entry, name, version_tag, value); +#endif Py_DECREF(old_value); } -#endif - void _PyTypes_AfterFork(void) { @@ -5611,22 +5703,93 @@ _PyTypes_AfterFork(void) #endif } -/* Internal API to look for a name through the MRO. - This returns a strong reference, and doesn't set an exception! - If nonzero, version is set to the value of type->tp_version at the time of - the lookup. -*/ -PyObject * -_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) +static struct type_cache_entry * +type_get_cache_entry(PyTypeObject *type, PyObject *name) { + unsigned int h = MCACHE_HASH_METHOD(type, name); + struct type_cache *cache = get_type_cache(); + return &cache->hashtable[h]; +} + +// Implementation of _PyType_LookupRefAndVersion() for non-freethreaded build +// or for when the TYPE_LOCK mutex is already held. +static PyObject * +type_lookup_lock_held(PyTypeObject *type, PyObject *name, unsigned int *version) +{ + ASSERT_TYPE_LOCK_HELD(); PyObject *res; int error; PyInterpreterState *interp = _PyInterpreterState_GET(); - unsigned int h = MCACHE_HASH_METHOD(type, name); - struct type_cache *cache = get_type_cache(); - struct type_cache_entry *entry = &cache->hashtable[h]; + struct type_cache_entry *entry = type_get_cache_entry(type, name); + if (entry->version == type->tp_version_tag && + entry->name == name) { + assert(type->tp_version_tag); + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + Py_XINCREF(entry->value); + if (version != NULL) { + *version = entry->version; + } + return entry->value; + } + OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); + + /* We may end up clearing live exceptions below, so make sure it's ours. */ + assert(!PyErr_Occurred()); + + unsigned int assigned_version = 0; // 0 is not a valid version + if (MCACHE_CACHEABLE_NAME(name)) { + if (assign_version_tag(interp, type)) { + assigned_version = type->tp_version_tag; + } + } + // Calling find_name_in_mro() might cause the type version to change. For + // example, if a __hash__ or __eq__ method mutates the types. Since this + // case is expected to be rare, it seems better to not explicitly check + // for it (by checking if the version changed) and to do the useless cache + // update anyhow. + res = find_name_in_mro(type, name, &error); + + if (!error) { + /* Only put NULL results into cache if there was no error. */ + maybe_update_cache(entry, name, assigned_version, res); + if (version != NULL) { + *version = assigned_version; + } + return res; + } + else { + // It's not ideal to clear the error condition, but this function is + // documented as not setting an exception, and I don't want to change + // that. E.g., when PyType_Ready() can't proceed, it won't set the + // "ready" flag, so future attempts to ready the same type will call + // it again -- hopefully in a context that propagates the exception + // out. + if (error == -1) { + PyErr_Clear(); + } + if (version != NULL) { + // 0 is not a valid version + *version = 0; + } + return NULL; + } +} + #ifdef Py_GIL_DISABLED +// Implementation of _PyType_LookupRefAndVersion() for freethreaded build for +// when the TYPE_LOCK mutex is not already held. Avoids acquiring the lock if +// the lookup can be completed using the cache. +PyObject * +type_lookup_gil_disabled(PyTypeObject *type, PyObject *name, unsigned int *version) +{ + PyObject *res; + int error; + PyInterpreterState *interp = _PyInterpreterState_GET(); + + struct type_cache_entry *entry = type_get_cache_entry(type, name); // synchronize-with other writing threads by doing an acquire load on the sequence while (1) { uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence); @@ -5657,48 +5820,42 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve break; } } -#else - if (entry->version == type->tp_version_tag && - entry->name == name) { - assert(type->tp_version_tag); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - Py_XINCREF(entry->value); - if (version != NULL) { - *version = entry->version; - } - return entry->value; - } -#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); - // We need to atomically do the lookup and capture the version before - // anyone else can modify our mro or mutate the type. - - int has_version = 0; - unsigned int assigned_version = 0; - BEGIN_TYPE_LOCK(); - res = find_name_in_mro(type, name, &error); + unsigned int assigned_version = 0; // 0 is not a valid version if (MCACHE_CACHEABLE_NAME(name)) { - has_version = assign_version_tag(interp, type); - assigned_version = type->tp_version_tag; - } - END_TYPE_LOCK(); - - /* Only put NULL results into cache if there was no error. */ - if (error) { - /* It's not ideal to clear the error condition, - but this function is documented as not setting - an exception, and I don't want to change that. - E.g., when PyType_Ready() can't proceed, it won't - set the "ready" flag, so future attempts to ready - the same type will call it again -- hopefully - in a context that propagates the exception out. - */ + types_mutex_lock(); + if (assign_version_tag(interp, type)) { + assigned_version = type->tp_version_tag; + } + types_mutex_unlock(); + } + // Calling find_name_in_mro() might cause the type version to change. For + // example, if a __hash__ or __eq__ method mutates the types. Since this + // case is expected to be rare, it seems better to not explicitly check + // for it (by checking if the version changed) and to do the useless cache + // update anyhow. + res = find_name_in_mro(type, name, &error); + + if (!error) { + /* Only put NULL results into cache if there was no error. */ + maybe_update_cache(entry, name, assigned_version, res); + if (version != NULL) { + *version = assigned_version; + } + return res; + } + else { + // It's not ideal to clear the error condition, but this function is + // documented as not setting an exception, and I don't want to change + // that. E.g., when PyType_Ready() can't proceed, it won't set the + // "ready" flag, so future attempts to ready the same type will call + // it again -- hopefully in a context that propagates the exception + // out. if (error == -1) { PyErr_Clear(); } @@ -5708,20 +5865,22 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve } return NULL; } +} +#endif - if (has_version) { -#if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, assigned_version, res); +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ +PyObject * +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) +{ +#ifdef Py_GIL_DISABLED + return type_lookup_gil_disabled(type, name, version); #else - PyObject *old_value = update_cache(entry, name, assigned_version, res); - Py_DECREF(old_value); + return type_lookup_lock_held(type, name, version); #endif - } - if (version != NULL) { - // 0 is not a valid version - *version = has_version ? assigned_version : 0; - } - return res; } /* Internal API to look for a name through the MRO. @@ -5751,7 +5910,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return 0; } int can_cache; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(init); @@ -5759,7 +5918,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, if (can_cache) { FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); } - END_TYPE_LOCK(); + types_mutex_unlock(); return can_cache; } @@ -5770,7 +5929,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor return 0; } int can_cache; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): @@ -5784,33 +5943,29 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); } - END_TYPE_LOCK(); + types_mutex_unlock(); return can_cache; } void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); unsigned long new_flags = (self->tp_flags & ~mask) | flags; if (new_flags != self->tp_flags) { -#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif - self->tp_flags = new_flags; -#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP - _PyEval_StartTheWorld(interp); -#endif + types_stop_world(); + // can't use new_flags here since they could be out-of-date + self->tp_flags = (self->tp_flags & ~mask) | flags; + types_start_world(); } - END_TYPE_LOCK(); + types_mutex_unlock(); } int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version) { int err; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); err = validate(ty); if (!err) { if(assign_version_tag(_PyInterpreterState_GET(), ty)) { @@ -5820,7 +5975,7 @@ _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_ err = -1; } } - END_TYPE_LOCK(); + types_mutex_unlock(); return err; } @@ -5850,9 +6005,9 @@ set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags) void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_stop_world(); set_flags_recursive(self, mask, flags); - END_TYPE_LOCK(); + types_start_world(); } /* This is similar to PyObject_GenericGetAttr(), @@ -5966,15 +6121,14 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED static int update_slot_world_stopped(PyTypeObject *type, PyObject *name) { int ret; - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); + types_stop_world(); ret = update_slot(type, name); - _PyEval_StartTheWorld(interp); + types_start_world(); return ret; } #endif @@ -5985,6 +6139,8 @@ static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) { + ASSERT_TYPE_LOCK_HELD(); + // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor // of the current value as it can observe the cache in an inconsistent @@ -6006,12 +6162,16 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", ((PyTypeObject*)type)->tp_name, name); + // FIXME: reentrancy possible + types_mutex_unlock(); _PyObject_SetAttributeErrorContext((PyObject *)type, name); + types_mutex_lock(); return -1; } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED if (is_dunder_name(name) && has_slotdef(name)) { + // update is potentially changing one or more slots return update_slot_world_stopped(type, name); } #else @@ -6078,22 +6238,25 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { // We don't just do PyType_Ready because we could already be readying - BEGIN_TYPE_LOCK(); + types_mutex_lock(); dict = type->tp_dict; if (dict == NULL) { dict = type->tp_dict = PyDict_New(); } - END_TYPE_LOCK(); + types_mutex_unlock(); if (dict == NULL) { res = -1; goto done; } } - BEGIN_TYPE_DICT_LOCK(dict); + // FIXME: can this deadlock? if so, how to avoid + types_mutex_lock(); + Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); assert(_PyType_CheckConsistency(type)); - END_TYPE_DICT_LOCK(); + Py_END_CRITICAL_SECTION(); + types_mutex_unlock(); done: Py_DECREF(name); @@ -6189,10 +6352,10 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); type_clear_flags(type, Py_TPFLAGS_READY); set_version_unlocked(type, 0); - END_TYPE_LOCK(); + types_mutex_unlock(); } _PyStaticType_ClearWeakRefs(interp, type); @@ -7023,13 +7186,12 @@ object_set_class(PyObject *self, PyObject *value, void *closure) } #ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); + types_stop_world(); #endif PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); #ifdef Py_GIL_DISABLED - _PyEval_StartTheWorld(interp); + types_start_world(); #endif if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { @@ -8428,6 +8590,7 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8503,6 +8666,7 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8700,6 +8864,7 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -8791,14 +8956,14 @@ PyType_Ready(PyTypeObject *type) } int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } - END_TYPE_LOCK(); + types_mutex_unlock(); return res; } @@ -8832,9 +8997,9 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); res = type_ready(self, initial); - END_TYPE_LOCK(); + types_mutex_unlock(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9340,9 +9505,9 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) } int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); res = hackcheck_unlocked(self, func, what); - END_TYPE_LOCK(); + types_mutex_unlock(); return res; } @@ -10563,9 +10728,9 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); - END_TYPE_LOCK(); + types_mutex_unlock(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11046,7 +11211,7 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED // Return true if "name" corresponds to at least one slot definition. This is // a more accurate but more expensive test compared to is_dunder_name(). static bool @@ -11119,6 +11284,7 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11235,6 +11401,7 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11253,6 +11420,8 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); + assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11289,14 +11458,14 @@ fixup_slot_dispatchers(PyTypeObject *type) // This lock isn't strictly necessary because the type has not been // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro // where we'd like to assert that the type is locked. - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } - END_TYPE_LOCK(); + types_mutex_unlock(); } // Called when __bases__ is re-assigned. @@ -11306,14 +11475,7 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); - -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - // Similar to update_slot_world_stopped(), this is required to avoid - // races. We don't use update_slot_world_stopped() here because we want - // to stop once rather than once per slot. - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif + ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); @@ -11322,10 +11484,6 @@ update_all_slots(PyTypeObject* type) /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } - -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - _PyEval_StartTheWorld(interp); -#endif } @@ -11596,10 +11754,10 @@ PyType_Freeze(PyTypeObject *type) return -1; } - BEGIN_TYPE_LOCK(); + types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); type_modified_unlocked(type); - END_TYPE_LOCK(); + types_start_world(); return 0; } @@ -11664,14 +11822,14 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); mro = lookup_tp_mro(su_obj_type); /* keep a strong reference to mro because su_obj_type->tp_mro can be replaced during PyDict_GetItemRef(dict, name, &res) and because another thread can modify it after we end the critical section below */ Py_XINCREF(mro); - END_TYPE_LOCK(); + types_mutex_unlock(); if (mro == NULL) return NULL; From ca00e74171ecbfce4b3dbf1b1b6b26f3db1ef776 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 03:10:26 -0700 Subject: [PATCH 09/45] Fix non-debug build. --- Include/internal/pycore_typeobject.h | 1 + Objects/typeobject.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c6b040ea949311..1a4f89fd2449a0 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -32,6 +32,7 @@ extern "C" { #define _Py_TYPE_BASE_VERSION_TAG (2<<16) #define _Py_MAX_GLOBAL_TYPE_VERSION_TAG (_Py_TYPE_BASE_VERSION_TAG - 1) + /* runtime lifecycle */ extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e009394de2bcee..a0c27ac8a184f1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -72,12 +72,10 @@ class object "PyObject *" "&PyBaseObject_Type" // modifying these, in order to avoid data races. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#ifdef Py_DEBUG // Used to check for correct use of the TYPE_LOCK mutex. It is a simple // mutex and does not support re-entrancy. If we already hold the lock and // try to acquire it again with the same thread, it is a bug on the code. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid -#endif static bool types_world_is_stopped(void) @@ -110,12 +108,14 @@ types_start_world(void) assert(!types_world_is_stopped()); } +#ifdef Py_DEBUG static bool types_mutex_is_owned(void) { PyThread_ident_t tid = PyThread_get_thread_ident_ex(); return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; } +#endif static void types_mutex_set_owned(PyThread_ident_t tid) From 6db454246303204720c25fe5b5784793d184e36d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 12:31:47 -0700 Subject: [PATCH 10/45] Improve comments. --- Objects/typeobject.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a0c27ac8a184f1..2dcc68a75b08e4 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -60,23 +60,32 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for mutation of types. This avoids having to take +// There's a global lock for types that ensures that the tp_version_tag is +// correctly updated if the type is modified. This avoids having to take // additional locks while doing various subclass processing which may result // in odd behaviors w.r.t. running with the GIL as the outer type lock could // be released and reacquired during a subclass update if there's contention // on the subclass lock. // -// While modification of type slots and type flags is protected by the -// global types lock. However, the slots and flags are read non-atomically -// without holding the type lock. So, we need to stop-the-world while -// modifying these, in order to avoid data races. +// Note that this lock does not protect when updating type slots or the +// tp_flags member. Instead, we either ensure those updates are done before +// the type has been revealed to other threads or we only do those updates +// while the stop-the-world mechanism is active. The slots and flags are read +// in many places without holding a lock and without atomics. +// +// Since TYPE_LOCK is used as regular mutex, we must take special care about +// potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to +// ensure that we are not trying to re-acquire the mutex when it is already +// held by the current thread. There are a couple cases when we release the +// mutex, when we call functions that might re-enter. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -// Used to check for correct use of the TYPE_LOCK mutex. It is a simple +// Used to check for correct use of the TYPE_LOCK mutex. It is a regular // mutex and does not support re-entrancy. If we already hold the lock and // try to acquire it again with the same thread, it is a bug on the code. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +// Return true if the world is currently stopped. static bool types_world_is_stopped(void) { @@ -109,6 +118,7 @@ types_start_world(void) } #ifdef Py_DEBUG +// Return true if the TYPE_LOCK mutex is held by the current thread. static bool types_mutex_is_owned(void) { @@ -117,6 +127,7 @@ types_mutex_is_owned(void) } #endif +// Set the TID of the thread currently holding TYPE_LOCK. static void types_mutex_set_owned(PyThread_ident_t tid) { @@ -3401,6 +3412,7 @@ mro_invoke(PyTypeObject *type) if (mro_result == NULL) return NULL; + // FIXME: possible re-entrancy, drop lock? new_mro = PySequence_Tuple(mro_result); Py_DECREF(mro_result); if (new_mro == NULL) { @@ -4576,6 +4588,7 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); + // After this point, other threads can potentally use this type. type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5290,6 +5303,7 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); + // After this point, other threads can potentally use this type. type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: From 75d6b71a0169539c1a798c425c053127b35b046c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 23:50:39 -0700 Subject: [PATCH 11/45] Avoid unused function warning. --- Objects/typeobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b7311e7f224dc0..7c9b7712e174bd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5821,6 +5821,7 @@ type_assign_version_lock_held(PyTypeObject *type) } } +#ifdef Py_GIL_DISABLED static unsigned int type_assign_version(PyTypeObject *type) { @@ -5830,6 +5831,7 @@ type_assign_version(PyTypeObject *type) types_mutex_unlock(); return version; } +#endif static unsigned int type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, From 895a86a6a85a30892500d42f6d84a29a94afd363 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:05:11 -0700 Subject: [PATCH 12/45] Remove unwanted suppression (bad merge). --- Tools/tsan/suppressions_free_threading.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 40b4bc933bc058..771a7997143a0f 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -13,7 +13,6 @@ # These warnings trigger directly in a CPython function. race_top:assign_version_tag -race_top:_multiprocessing_SemLock_acquire_impl race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode From 65e40f4856b460e2212ac4f7b6978371e5222f22 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:37:14 -0700 Subject: [PATCH 13/45] Fixes based on review feedback. * In type_set_bases(), always use stop-the-world. * Fixes for mro_invoke(). We need to do different things depending on if there is a custom mro() method and where we are called from. If from type_ready(), need the TYPE_LOCK mutex. If from assignment to __bases__, need stop-the-world. * Remove some places TYPE_LOCK is acquired when it no longer needs to be. * Add comments to some _lock_held() functions to note that they might be called with the world stopped, rather than holding the lock. * Remove update_slot_world_stopped() by inlining it. --- Objects/typeobject.c | 106 ++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7c9b7712e174bd..9f3c104240bb70 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -156,7 +156,7 @@ types_mutex_unlock(void) } #define ASSERT_TYPE_LOCK_HELD() \ - {if (!types_world_is_stopped()) assert(types_mutex_is_owned());} + assert(types_world_is_stopped() || types_mutex_is_owned()) #else @@ -432,7 +432,8 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) if (tp->tp_flags & Py_TPFLAGS_READY) { // It's possible the type object has been exposed to other threads // if it's been marked ready. In that case, the type lock should be - // held when flags are modified. + // held when flags are modified (or the stop-the-world mechanism should + // be active). ASSERT_TYPE_LOCK_HELD(); } ASSERT_NEW_OR_STOPPED(tp); @@ -1923,18 +1924,9 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); int res; - bool need_stop = !types_world_is_stopped(); - if (need_stop) { - // This function can be re-entered. We want to avoid trying to stop - // the world a second time if that happens. Example call chain: - // mro_invoke() -> call_unbound_noarg() -> type_setattro() -> - // type_set_bases() - types_stop_world(); - } + types_stop_world(); res = type_set_bases_unlocked(type, new_bases); - if (need_stop) { - types_start_world(); - } + types_start_world(); return res; } @@ -3487,16 +3479,48 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { + // Custom mro() method on metaclass. This is potentially + // re-entrant. We are called either from type_ready(), in which case + // the TYPE_LOCK mutex is held, or from type_set_bases() (assignment to + // __bases__), in which case we need to stop-the-world in order to avoid + // data races. + bool stopped = types_world_is_stopped(); + if (stopped) { + // Called for type_set_bases(), re-assigment of __bases__ + types_start_world(); + } + else { + // Called from type_ready() + ASSERT_TYPE_LOCK_HELD(); + types_mutex_unlock(); + } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); + if (mro_result != NULL) { + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); + } + else { + new_mro = NULL; + } + if (stopped) { + types_stop_world(); + } + else { + types_mutex_lock(); + } } else { + // In this case, the mro() method on the type object is being used and + // we know that these calls are not re-entrant. mro_result = mro_implementation(type); + if (mro_result != NULL) { + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); + } + else { + new_mro = NULL; + } } - if (mro_result == NULL) - return NULL; - - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); if (new_mro == NULL) { return NULL; } @@ -3549,9 +3573,7 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - types_mutex_unlock(); new_mro = mro_invoke(type); /* might cause reentrance */ - types_mutex_lock(); reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -5523,7 +5545,6 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; - types_mutex_lock(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5550,7 +5571,6 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } - types_mutex_unlock(); if (res == NULL) { PyErr_Format( @@ -5806,12 +5826,14 @@ _PyTypes_AfterFork(void) #endif } -// Try to assign a new type version tag, return it if -// successful. Return 0 if no version was assigned. +// Try to assign a new type version tag, return it if successful. Return 0 +// if no version was assigned. Note that this function can be called while +// holding the TYPE_LOCK mutex or when the stop-the-world mechanism is active. static unsigned int type_assign_version_lock_held(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + assert(!types_world_is_stopped()); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { return type->tp_version_tag; @@ -5916,6 +5938,8 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, return assigned_version; } +// Note that this function can be called while holding the TYPE_LOCK mutex or +// when the stop-the-world mechanism is active. static unsigned int type_lookup_lock_held(PyTypeObject *type, PyObject *name, _PyStackRef *out) { @@ -6019,7 +6043,6 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - types_mutex_lock(); unsigned long new_flags = (self->tp_flags & ~mask) | flags; if (new_flags != self->tp_flags) { types_stop_world(); @@ -6027,7 +6050,6 @@ _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) self->tp_flags = (self->tp_flags & ~mask) | flags; types_start_world(); } - types_mutex_unlock(); } int @@ -6190,18 +6212,6 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef Py_GIL_DISABLED -static int -update_slot_world_stopped(PyTypeObject *type, PyObject *name) -{ - int ret; - types_stop_world(); - ret = update_slot(type, name); - types_start_world(); - return ret; -} -#endif - // Called by type_setattro(). Updates both the type dict and // any type slots that correspond to the modified entry. static int @@ -6241,7 +6251,11 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, #ifdef Py_GIL_DISABLED if (is_dunder_name(name) && has_slotdef(name)) { // update is potentially changing one or more slots - return update_slot_world_stopped(type, name); + int ret; + types_stop_world(); + ret = update_slot(type, name); + types_start_world(); + return ret; } #else if (is_dunder_name(name)) { @@ -6319,7 +6333,7 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } - // FIXME: can this deadlock? if so, how to avoid + // FIXME: I'm not totaly sure this is safe from a deadlock. If so, how to avoid? types_mutex_lock(); Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); @@ -7254,14 +7268,10 @@ object_set_class(PyObject *self, PyObject *value, void *closure) return -1; } -#ifdef Py_GIL_DISABLED types_stop_world(); -#endif PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); -#ifdef Py_GIL_DISABLED types_start_world(); -#endif if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_DECREF(oldto); @@ -10736,9 +10746,7 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - types_mutex_lock(); base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); - types_mutex_unlock(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11806,14 +11814,8 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; - types_mutex_lock(); mro = lookup_tp_mro(su_obj_type); - /* keep a strong reference to mro because su_obj_type->tp_mro can be - replaced during PyDict_GetItemRef(dict, name, &res) and because - another thread can modify it after we end the critical section - below */ Py_XINCREF(mro); - types_mutex_unlock(); if (mro == NULL) return NULL; From b68f1a13e306791bf79891a414dfd2a0b3a7ac5c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:51:06 -0700 Subject: [PATCH 14/45] Remove spurious assert(). --- Objects/typeobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9f3c104240bb70..9905da23277923 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5833,7 +5833,6 @@ static unsigned int type_assign_version_lock_held(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); - assert(!types_world_is_stopped()); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { return type->tp_version_tag; From 1b844867741c2d831fa6fd5d90dcaada0070c8ff Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:40:14 -0700 Subject: [PATCH 15/45] Omit mutex_tid member from default build. --- Include/internal/pycore_interp_structs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 96355d52fa92f1..bb2967c39b878f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -566,8 +566,10 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; +#ifdef Py_GIL_DISABLED // used to check correct usage of the above mutex unsigned long long mutex_tid; +#endif // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE From 2af9e4954d8cf58481203e21ce4185d1293209f4 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:40:35 -0700 Subject: [PATCH 16/45] Remove Py_TPFLAGS_EXPOSED flag and related logic. This was useful for debugging logic but we don't want to use an additional type flag. --- Include/object.h | 4 ++-- Objects/typeobject.c | 25 ------------------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/Include/object.h b/Include/object.h index 82b7ede695d819..d23918c2da67f9 100644 --- a/Include/object.h +++ b/Include/object.h @@ -576,8 +576,8 @@ given type object has a specified feature. /* Objects behave like an unbound method */ #define Py_TPFLAGS_METHOD_DESCRIPTOR (1UL << 17) -/* Type structure is potentially exposed (revealed) to other threads */ -#define Py_TPFLAGS_EXPOSED (1UL << 19) +/* Unused. Legacy flag */ +#define Py_TPFLAGS_VALID_VERSION_TAG (1UL << 19) /* Type is abstract and cannot be instantiated */ #define Py_TPFLAGS_IS_ABSTRACT (1UL << 20) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9905da23277923..96d44484a31411 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -93,12 +93,6 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } -// Checks that either the type has not yet been exposed (revealed) to other -// threads (it was newly created and the Py_TPFLAGS_EXPOSED flag is not set -// yet) or the world is stopped and we can safely update it without races. -#define ASSERT_NEW_OR_STOPPED(tp) \ - assert((((tp)->tp_flags & Py_TPFLAGS_EXPOSED) == 0) || types_world_is_stopped()) - static void types_stop_world(void) { @@ -166,7 +160,6 @@ types_mutex_unlock(void) #define types_stop_world() #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) #endif @@ -436,7 +429,6 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // be active). ASSERT_TYPE_LOCK_HELD(); } - ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -444,7 +436,6 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -580,7 +571,6 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); - ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -638,7 +628,6 @@ _PyType_GetMRO(PyTypeObject *self) static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { - ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1717,7 +1706,6 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -3564,7 +3552,6 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3614,7 +3601,6 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; - ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); return res; } @@ -4694,8 +4680,6 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); - // After this point, other threads can potentally use this type. - type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5409,8 +5393,6 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); - // After this point, other threads can potentally use this type. - type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: if (PyErr_Occurred()) { @@ -8668,7 +8650,6 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8744,7 +8725,6 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8942,7 +8922,6 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -11275,7 +11254,6 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11392,7 +11370,6 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11411,7 +11388,6 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11466,7 +11442,6 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); From f65e87c3a787c5d270499b3ec6d8e2b5b0d58a20 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:47:07 -0700 Subject: [PATCH 17/45] Improve comment for TYPE_LOCK. --- Objects/typeobject.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 96d44484a31411..53afcd7e548488 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -60,29 +60,28 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for types that ensures that the tp_version_tag is -// correctly updated if the type is modified. This avoids having to take -// additional locks while doing various subclass processing which may result -// in odd behaviors w.r.t. running with the GIL as the outer type lock could -// be released and reacquired during a subclass update if there's contention -// on the subclass lock. +// There's a global lock for types that ensures that tp_version_tag and +// _spec_cache are correctly updated if the type is modified. This avoids +// having to take additional locks while doing various subclass processing +// which may result in odd behaviors w.r.t. running with the GIL as the outer +// type lock could be released and reacquired during a subclass update if +// there's contention on the subclass lock. // // Note that this lock does not protect when updating type slots or the // tp_flags member. Instead, we either ensure those updates are done before // the type has been revealed to other threads or we only do those updates // while the stop-the-world mechanism is active. The slots and flags are read // in many places without holding a lock and without atomics. -// +#define TYPE_LOCK &PyInterpreterState_Get()->types.mutex + // Since TYPE_LOCK is used as regular mutex, we must take special care about // potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to // ensure that we are not trying to re-acquire the mutex when it is already // held by the current thread. There are a couple cases when we release the -// mutex, when we call functions that might re-enter. -#define TYPE_LOCK &PyInterpreterState_Get()->types.mutex - -// Used to check for correct use of the TYPE_LOCK mutex. It is a regular -// mutex and does not support re-entrancy. If we already hold the lock and -// try to acquire it again with the same thread, it is a bug on the code. +// mutex, when we call functions that might re-enter. If we already hold the +// lock and try to acquire it again with the same thread, it is a bug in the +// code. We could just let it deadlock but having an assert failure gives +// a more explicit error. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid // Return true if the world is currently stopped. From 395a6d3de63dbd9e98d77f17acbb7b3169864d98 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 12:35:51 -0700 Subject: [PATCH 18/45] Re-add the ASSERT_NEW_OR_STOPPED() asserts. These seem like helpful asserts that could catch future bugs, e.g. if the typeobject.c code is refactored or significantly changed. So, I think it makes sense to allocate a ob_flags bit for this purpose. We can remove it later if desired since it's non-public. Move the ob_flags bit values into the object.h header file. --- Include/object.h | 6 ++++++ Include/refcount.h | 3 --- Objects/typeobject.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/Include/object.h b/Include/object.h index d23918c2da67f9..994cac1ad17501 100644 --- a/Include/object.h +++ b/Include/object.h @@ -620,6 +620,12 @@ given type object has a specified feature. #define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0) #define Py_TPFLAGS_HAVE_VERSION_TAG (1UL << 18) +// Flag values for ob_flags (16 bits available, if SIZEOF_VOID_P > 4). +#define _Py_IMMORTAL_FLAGS (1 << 0) +#define _Py_STATICALLY_ALLOCATED_FLAG (1 << 2) +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) +#define _Py_TYPE_REVEALED_FLAG (1 << 3) +#endif #define Py_CONSTANT_NONE 0 #define Py_CONSTANT_FALSE 1 diff --git a/Include/refcount.h b/Include/refcount.h index 177bbdaf0c5977..ebd1dba6d15e1a 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -19,9 +19,6 @@ immortal. The latter should be the only instances that require cleanup during runtime finalization. */ -#define _Py_STATICALLY_ALLOCATED_FLAG 4 -#define _Py_IMMORTAL_FLAGS 1 - #if SIZEOF_VOID_P > 4 /* In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 53afcd7e548488..073e919c715d17 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -92,6 +92,19 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } +// Checks that either the type has not yet been revealed (exposed) to +// other threads or the world is stopped and we can safely update it +// without races. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might +// be revealed. +#if SIZEOF_VOID_P > 4 +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) == 0 || \ + types_world_is_stopped()) +#else +#define ASSERT_NEW_OR_STOPPED(tp) +#endif + static void types_stop_world(void) { @@ -159,6 +172,7 @@ types_mutex_unlock(void) #define types_stop_world() #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) #endif @@ -428,6 +442,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // be active). ASSERT_TYPE_LOCK_HELD(); } + ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -435,6 +450,7 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -570,6 +586,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); + ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -627,6 +644,7 @@ _PyType_GetMRO(PyTypeObject *self) static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { + ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1705,6 +1723,7 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -3551,6 +3570,7 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3600,6 +3620,7 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; + ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); return res; } @@ -4679,6 +4700,10 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif return (PyObject *)type; @@ -5392,6 +5417,10 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif finally: if (PyErr_Occurred()) { @@ -8649,6 +8678,7 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8724,6 +8754,7 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8921,6 +8952,7 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -11253,6 +11285,7 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11369,6 +11402,7 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11387,6 +11421,7 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11441,6 +11476,7 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); From e4f87e57289e5572c20a22285f8f128023667189 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 11:36:10 -0700 Subject: [PATCH 19/45] Further cleanups of the locking in typeobject. Reduce the number of places and the regions where TYPE_LOCK is being held. Most significantly, don't take the lock inside type_ready(). This removes the need for some _lock_held variants of some functions. We don't need type_lookup_lock_held() anymore, for example. Rename type_set_bases_unlocked() to type_set_bases_world_stopped(). --- Objects/typeobject.c | 206 +++++++++++++------------------------------ 1 file changed, 62 insertions(+), 144 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 073e919c715d17..61f97e99849d19 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -92,19 +92,20 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } -// Checks that either the type has not yet been revealed (exposed) to -// other threads or the world is stopped and we can safely update it -// without races. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and -// PyType_FromMetaclass() to indicate that a newly initialized type might -// be revealed. +// Checks that the type has not yet been revealed (exposed) to other +// threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might be +// revealed. We only have ob_flags on 64-bit platforms. #if SIZEOF_VOID_P > 4 -#define ASSERT_NEW_OR_STOPPED(tp) \ - assert((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) == 0 || \ - types_world_is_stopped()) +#define TYPE_IS_REVEALED(tp) ((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) != 0) #else -#define ASSERT_NEW_OR_STOPPED(tp) +#define TYPE_IS_REVEALED(tp) 0 #endif +// Checks if we can safely update type slots or tp_flags. +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) + static void types_stop_world(void) { @@ -435,13 +436,6 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - if (tp->tp_flags & Py_TPFLAGS_READY) { - // It's possible the type object has been exposed to other threads - // if it's been marked ready. In that case, the type lock should be - // held when flags are modified (or the stop-the-world mechanism should - // be active). - ASSERT_TYPE_LOCK_HELD(); - } ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -449,7 +443,6 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); @@ -1214,9 +1207,6 @@ PyType_Modified(PyTypeObject *type) types_mutex_unlock(); } -static unsigned int type_lookup_lock_held(PyTypeObject *type, PyObject *name, - _PyStackRef *out); - static int is_subtype_with_mro(PyObject *a_mro, PyTypeObject *a, PyTypeObject *b); @@ -1230,8 +1220,8 @@ has_custom_mro(PyTypeObject *tp) _PyThreadState_PushCStackRef(tstate, &c_ref1); _PyThreadState_PushCStackRef(tstate, &c_ref2); - type_lookup_lock_held(Py_TYPE(tp), &_Py_ID(mro), &c_ref1.ref); - type_lookup_lock_held(&PyType_Type, &_Py_ID(mro), &c_ref2.ref); + _PyType_LookupStackRefAndVersion(Py_TYPE(tp), &_Py_ID(mro), &c_ref1.ref); + _PyType_LookupStackRefAndVersion(&PyType_Type, &_Py_ID(mro), &c_ref2.ref); int custom = !PyStackRef_Is(c_ref1.ref, c_ref2.ref); @@ -1255,14 +1245,24 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ - Py_ssize_t i, n; +#ifdef Py_GIL_DISABLED + unsigned int meta_version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(type)->tp_version_tag); +#endif + bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - ASSERT_TYPE_LOCK_HELD(); - if (!Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type)) { + types_mutex_lock(); + if (is_custom) { goto clear; } - n = PyTuple_GET_SIZE(bases); - for (i = 0; i < n; i++) { +#ifdef Py_GIL_DISABLED + if (meta_version != Py_TYPE(type)->tp_version_tag) { + // Raced with another thread mutating the metaclass, possible that mro() + // method was assigned. + goto clear; + } +#endif + Py_ssize_t n = PyTuple_GET_SIZE(bases); + for (Py_ssize_t i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); @@ -1274,7 +1274,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) goto clear; } } - return; + goto done; clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); @@ -1286,6 +1286,9 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) FT_ATOMIC_STORE_PTR_RELAXED( ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); } + +done: + types_mutex_unlock(); } /* @@ -1704,7 +1707,7 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) } static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, PyObject **); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1722,11 +1725,10 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; - int res = mro_internal(type, &old_mro); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1788,7 +1790,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) +type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) { // Check arguments if (!check_set_special_type_attr(type, new_bases, "__bases__")) { @@ -1931,7 +1933,7 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); int res; types_stop_world(); - res = type_set_bases_unlocked(type, new_bases); + res = type_set_bases_world_stopped(type, new_bases); types_start_world(); return res; } @@ -2886,18 +2888,9 @@ _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or static int lookup_method_ex(PyObject *self, PyObject *attr, _PyStackRef *out, - int raise_attribute_error, int lock_held) + int raise_attribute_error) { -#ifdef Py_GIL_DISABLED - if (lock_held) { - type_lookup_lock_held(Py_TYPE(self), attr, out); - } - else { - _PyType_LookupStackRefAndVersion(Py_TYPE(self), attr, out); - } -#else _PyType_LookupStackRefAndVersion(Py_TYPE(self), attr, out); -#endif if (PyStackRef_IsNull(*out)) { if (raise_attribute_error) { PyErr_SetObject(PyExc_AttributeError, attr); @@ -2931,13 +2924,13 @@ lookup_method_ex(PyObject *self, PyObject *attr, _PyStackRef *out, static int lookup_maybe_method(PyObject *self, PyObject *attr, _PyStackRef *out) { - return lookup_method_ex(self, attr, out, 0, 0); + return lookup_method_ex(self, attr, out, 0); } static int lookup_method(PyObject *self, PyObject *attr, _PyStackRef *out) { - return lookup_method_ex(self, attr, out, 1, 0); + return lookup_method_ex(self, attr, out, 1); } @@ -3495,11 +3488,6 @@ mro_invoke(PyTypeObject *type) // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } - else { - // Called from type_ready() - ASSERT_TYPE_LOCK_HELD(); - types_mutex_unlock(); - } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); if (mro_result != NULL) { new_mro = PySequence_Tuple(mro_result); @@ -3511,9 +3499,6 @@ mro_invoke(PyTypeObject *type) if (stopped) { types_stop_world(); } - else { - types_mutex_lock(); - } } else { // In this case, the mro() method on the type object is being used and @@ -3567,9 +3552,8 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; @@ -3600,7 +3584,9 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { + types_mutex_lock(); type_modified_unlocked(type); + types_mutex_unlock(); } else { /* For static builtin types, this is only called during init @@ -3616,15 +3602,6 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) return 1; } -static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) -{ - int res; - ASSERT_NEW_OR_STOPPED(type); - res = mro_internal_unlocked(type, 0, p_old_mro); - return res; -} - /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ @@ -5837,36 +5814,25 @@ _PyTypes_AfterFork(void) } // Try to assign a new type version tag, return it if successful. Return 0 -// if no version was assigned. Note that this function can be called while -// holding the TYPE_LOCK mutex or when the stop-the-world mechanism is active. +// if no version was assigned. static unsigned int -type_assign_version_lock_held(PyTypeObject *type) +type_assign_version(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); + unsigned int version; + types_mutex_lock(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { - return type->tp_version_tag; + version = type->tp_version_tag; } else { - return 0; + version = 0; } -} - -#ifdef Py_GIL_DISABLED -static unsigned int -type_assign_version(PyTypeObject *type) -{ - unsigned int version; - types_mutex_lock(); - version = type_assign_version_lock_held(type); types_mutex_unlock(); return version; } -#endif -static unsigned int -type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, - unsigned int (*assign_version)(PyTypeObject *)) +unsigned int +_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) { unsigned int h = MCACHE_HASH_METHOD(type, name); struct type_cache *cache = get_type_cache(); @@ -5916,7 +5882,7 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, PyObject *res; unsigned int assigned_version = 0; // 0 is not a valid version if (MCACHE_CACHEABLE_NAME(name)) { - assigned_version = assign_version(type); + assigned_version = type_assign_version(type); } // Calling find_name_in_mro() might cause the type version to change. For // example, if a __hash__ or __eq__ method mutates the types. Since this @@ -5947,25 +5913,6 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, return assigned_version; } -// Note that this function can be called while holding the TYPE_LOCK mutex or -// when the stop-the-world mechanism is active. -static unsigned int -type_lookup_lock_held(PyTypeObject *type, PyObject *name, _PyStackRef *out) -{ - return type_lookup_ex(type, name, out, &type_assign_version_lock_held); -} - -unsigned int -_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) -{ -#ifdef Py_GIL_DISABLED - return type_lookup_ex(type, name, out, &type_assign_version); -#else - return type_lookup_ex(type, name, out, &type_assign_version_lock_held); -#endif - -} - /* Internal API to look for a name through the MRO. This returns a strong reference, and doesn't set an exception! If nonzero, version is set to the value of type->tp_version at the time of @@ -6330,12 +6277,15 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { // We don't just do PyType_Ready because we could already be readying - types_mutex_lock(); dict = type->tp_dict; if (dict == NULL) { + // FIXME: this can race with other threads. However, it is rare + // that PyType_Ready() is not called before the type is used from + // multiple threads. To do this safely, we have to ensure the + // type is not revealed to other threads or we stop-the-world before + // doing this assignment. dict = type->tp_dict = PyDict_New(); } - types_mutex_unlock(); if (dict == NULL) { res = -1; goto done; @@ -8677,7 +8627,6 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type, int initial) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -8689,7 +8638,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8753,7 +8702,6 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ @@ -8951,7 +8899,6 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int initial) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); @@ -9044,14 +8991,12 @@ PyType_Ready(PyTypeObject *type) } int res; - types_mutex_lock(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } - types_mutex_unlock(); return res; } @@ -9085,9 +9030,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; - types_mutex_lock(); res = type_ready(self, initial); - types_mutex_unlock(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9538,12 +9481,13 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) +hackcheck(PyObject *self, setattrofunc func, const char *what) { + if (!PyType_Check(self)) { + return 1; + } PyTypeObject *type = Py_TYPE(self); - ASSERT_TYPE_LOCK_HELD(); - PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -9585,20 +9529,6 @@ hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) return 1; } -static int -hackcheck(PyObject *self, setattrofunc func, const char *what) -{ - if (!PyType_Check(self)) { - return 1; - } - - int res; - types_mutex_lock(); - res = hackcheck_unlocked(self, func, what); - types_mutex_unlock(); - return res; -} - static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -10713,7 +10643,7 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) } static releasebufferproc -releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) +type_lookup_base_releasebuffer(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); @@ -10755,7 +10685,7 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); + base_releasebuffer = type_lookup_base_releasebuffer(self, buffer); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11284,7 +11214,6 @@ has_slotdef(PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *descr; @@ -11401,7 +11330,6 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; @@ -11420,8 +11348,6 @@ update_slot(PyTypeObject *type, PyObject *name) pytype_slotdef **pp; int offset; - ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11456,17 +11382,10 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { - // This lock isn't strictly necessary because the type has not been - // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro - // where we'd like to assert that the type is locked. - types_mutex_lock(); - assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } - - types_mutex_unlock(); } // Called when __bases__ is re-assigned. @@ -11475,7 +11394,6 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ From 2a6655564533916aae0ec57e4564ee4b8b2c8820 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 11:41:46 -0700 Subject: [PATCH 20/45] Fix data race in resolve_slotdups(). The type_slots_pname and type_slots_ptrs are interpreter globals. Rather than trying to make access safe, just disable those caches in the free-threaded build. This cache is of dubious value anyhow. --- Include/internal/pycore_interp_structs.h | 3 +++ Objects/typeobject.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index bb2967c39b878f..246186f74bd853 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -670,8 +670,11 @@ struct _Py_interp_cached_objects { /* object.__reduce__ */ PyObject *objreduce; +#ifndef Py_GIL_DISABLED + /* resolve_slotdups() */ PyObject *type_slots_pname; pytype_slotdef *type_slots_ptrs[MAX_EQUIV]; +#endif /* TypeVar and related types */ PyTypeObject *generic_type; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 61f97e99849d19..b94a811262eb9c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -11107,12 +11107,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) { /* XXX Maybe this could be optimized more -- but is it worth it? */ +#ifdef Py_GIL_DISABLED + pytype_slotdef *ptrs[MAX_EQUIV]; + pytype_slotdef **pp = ptrs; + /* Collect all slotdefs that match name into ptrs. */ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) + *pp++ = p; + } + *pp = NULL; +#else /* pname and ptrs act as a little cache */ PyInterpreterState *interp = _PyInterpreterState_GET(); #define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname) #define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs) pytype_slotdef *p, **pp; - void **res, **ptr; if (pname != name) { /* Collect all slotdefs that match name into ptrs. */ @@ -11124,10 +11133,12 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) } *pp = NULL; } +#endif /* Look in all slots of the type matching the name. If exactly one of these has a filled-in slot, return a pointer to that slot. Otherwise, return NULL. */ + void **res, **ptr; res = NULL; for (pp = ptrs; *pp; pp++) { ptr = slotptr(type, (*pp)->offset); @@ -11137,9 +11148,11 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return NULL; res = ptr; } - return res; +#ifndef Py_GIL_DISABLED #undef pname #undef ptrs +#endif + return res; } #ifdef Py_GIL_DISABLED From 2efac2655b371511e146ae6465666c920dc6ac50 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 12:10:18 -0700 Subject: [PATCH 21/45] Fix comment. --- Objects/typeobject.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b94a811262eb9c..b5cd4b85225464 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3478,11 +3478,9 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { - // Custom mro() method on metaclass. This is potentially - // re-entrant. We are called either from type_ready(), in which case - // the TYPE_LOCK mutex is held, or from type_set_bases() (assignment to - // __bases__), in which case we need to stop-the-world in order to avoid - // data races. + // Custom mro() method on metaclass. This is potentially re-entrant. + // We are called either from type_ready() or from type_set_bases() + // (assignment to __bases__). bool stopped = types_world_is_stopped(); if (stopped) { // Called for type_set_bases(), re-assigment of __bases__ From f7d2d36599e277b144881072f162e9b15800c31f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Apr 2025 19:55:25 -0700 Subject: [PATCH 22/45] Make the init of tp_dict thread-safe. It is unlikely that tp_dict is not allocated when type_setattro() is called. However, if it is NULL, stop-the-world so we can safely assign it. --- Objects/typeobject.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b5cd4b85225464..30155f22a45c52 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6274,16 +6274,14 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { - // We don't just do PyType_Ready because we could already be readying - dict = type->tp_dict; - if (dict == NULL) { - // FIXME: this can race with other threads. However, it is rare - // that PyType_Ready() is not called before the type is used from - // multiple threads. To do this safely, we have to ensure the - // type is not revealed to other threads or we stop-the-world before - // doing this assignment. + // This is an unlikely case. PyType_Ready has not yet been done and + // we need to initialize tp_dict. We don't just do PyType_Ready + // because we could already be readying. + types_stop_world(); + if (type->tp_dict == NULL) { dict = type->tp_dict = PyDict_New(); } + types_start_world(); if (dict == NULL) { res = -1; goto done; From f3fd35a552728ab35b5af9afe34d6199c565cbd8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Apr 2025 19:57:46 -0700 Subject: [PATCH 23/45] Do some addtional locking simplification. Re-structure type_setattro() into a slot and non-slot case. For the slot case, we need to stop-the-world. For non-slot, we need to hold TYPE_LOCK. Remove unneeded locking in type_mro_modified(). This is only called if the type is new or if the world is stopped. In both cases, TYPE_LOCK doesn't need to be held. --- Objects/typeobject.c | 81 ++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 51 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 30155f22a45c52..316c214b2946fa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -144,9 +144,6 @@ types_mutex_set_owned(PyThread_ident_t tid) static void types_mutex_lock(void) { - if (types_world_is_stopped()) { - return; - } assert(!types_mutex_is_owned()); PyMutex_Lock(TYPE_LOCK); types_mutex_set_owned(PyThread_get_thread_ident_ex()); @@ -155,15 +152,11 @@ types_mutex_lock(void) static void types_mutex_unlock(void) { - if (types_world_is_stopped()) { - return; - } types_mutex_set_owned(0); PyMutex_Unlock(TYPE_LOCK); } -#define ASSERT_TYPE_LOCK_HELD() \ - assert(types_world_is_stopped() || types_mutex_is_owned()) +#define ASSERT_TYPE_LOCK_HELD() assert(types_mutex_is_owned()) #else @@ -1094,7 +1087,6 @@ PyType_Unwatch(int watcher_id, PyObject* obj) static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { - ASSERT_TYPE_LOCK_HELD(); assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); #ifndef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1142,7 +1134,6 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_TYPE_LOCK_HELD(); if (type->tp_version_tag == 0) { return; } @@ -1233,6 +1224,7 @@ has_custom_mro(PyTypeObject *tp) static void type_mro_modified(PyTypeObject *type, PyObject *bases) { + ASSERT_NEW_OR_STOPPED(type); /* Check that all base classes or elements of the MRO of type are able to be cached. This function is called after the base @@ -1250,7 +1242,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) #endif bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - types_mutex_lock(); if (is_custom) { goto clear; } @@ -1274,7 +1265,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) goto clear; } } - goto done; + return; clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); @@ -1286,9 +1277,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) FT_ATOMIC_STORE_PTR_RELAXED( ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); } - -done: - types_mutex_unlock(); } /* @@ -3582,9 +3570,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - types_mutex_lock(); type_modified_unlocked(type); - types_mutex_unlock(); } else { /* For static builtin types, this is only called during init @@ -3693,9 +3679,7 @@ static int update_slot(PyTypeObject *, PyObject *); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); -#ifdef Py_GIL_DISABLED static bool has_slotdef(PyObject *); -#endif /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -6167,13 +6151,11 @@ _Py_type_getattro(PyObject *tp, PyObject *name) } // Called by type_setattro(). Updates both the type dict and -// any type slots that correspond to the modified entry. +// the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) { - ASSERT_TYPE_LOCK_HELD(); - // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor // of the current value as it can observe the cache in an inconsistent @@ -6195,28 +6177,9 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", ((PyTypeObject*)type)->tp_name, name); - types_mutex_unlock(); - // Note: reentrancy possible - _PyObject_SetAttributeErrorContext((PyObject *)type, name); - types_mutex_lock(); - return -1; + return -2; } -#ifdef Py_GIL_DISABLED - if (is_dunder_name(name) && has_slotdef(name)) { - // update is potentially changing one or more slots - int ret; - types_stop_world(); - ret = update_slot(type, name); - types_start_world(); - return ret; - } -#else - if (is_dunder_name(name)) { - return update_slot(type, name); - } -#endif - return 0; } @@ -6288,13 +6251,31 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } - // FIXME: I'm not totaly sure this is safe from a deadlock. If so, how to avoid? - types_mutex_lock(); - Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); - assert(_PyType_CheckConsistency(type)); - Py_END_CRITICAL_SECTION(); - types_mutex_unlock(); + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + types_stop_world(); + Py_BEGIN_CRITICAL_SECTION(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + if (res == 0) { + res = update_slot(type, name); + } + Py_END_CRITICAL_SECTION(); + types_start_world(); + } else { + // The name is not a slot, hold TYPE_LOCK while updating version tags. + types_mutex_lock(); + Py_BEGIN_CRITICAL_SECTION(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + Py_END_CRITICAL_SECTION(); + types_mutex_unlock(); + } + if (res == -2) { + // We handle the attribute error case here because reentrancy possible + // when calling this function. + _PyObject_SetAttributeErrorContext((PyObject *)type, name); + res = -1; + } + assert(res < 0 || _PyType_CheckConsistency(type)); done: Py_DECREF(name); @@ -11151,7 +11132,6 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return res; } -#ifdef Py_GIL_DISABLED // Return true if "name" corresponds to at least one slot definition. This is // a more accurate but more expensive test compared to is_dunder_name(). static bool @@ -11164,7 +11144,6 @@ has_slotdef(PyObject *name) } return false; } -#endif /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * From 57c2a44171a7c03006234f8f32c8fd6a02336af7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 10 Apr 2025 16:19:18 -0700 Subject: [PATCH 24/45] Avoid acquiring the types mutex if version is set. This can potentially help quite a bit when the type version tag has already been assigned but the MCACHE is missed. We can complete the lookup without acquiring the types mutex. --- Objects/typeobject.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 316c214b2946fa..132db65625d84a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5800,16 +5800,18 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version; - types_mutex_lock(); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (assign_version_tag(interp, type)) { - version = type->tp_version_tag; - } - else { - version = 0; + unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag); + if (version == 0) { + types_mutex_lock(); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (assign_version_tag(interp, type)) { + version = type->tp_version_tag; + } + else { + version = 0; + } + types_mutex_unlock(); } - types_mutex_unlock(); return version; } From 7c0ccf548cba35ef44a4a1dbc1e5015ccc5f3264 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:24:12 -0700 Subject: [PATCH 25/45] Add check_invalid_reentrancy() call. --- Python/ceval.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 363f263ad2a083..4d493466169baf 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -138,6 +138,20 @@ #endif +static void +check_invalid_reentrancy(void) +{ +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + // In the free-threaded build, the interpreter must not be re-entered if + // either the world-is-stopped or if the types mutex is held. If so, + // that's a bug somewhere (likely in the painfully complex typeobject code). + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!interp->stoptheworld.world_stopped); + assert(interp->types.mutex_tid != PyThread_get_thread_ident_ex()); +#endif +} + + #ifdef Py_DEBUG static void dump_item(_PyStackRef item) @@ -959,6 +973,7 @@ PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); + check_invalid_reentrancy(); CALL_STAT_INC(pyeval_calls); #if USE_COMPUTED_GOTOS && !Py_TAIL_CALL_INTERP From 4fa77bb11c40c3c35f47a4f371baed5642dce498 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:25:16 -0700 Subject: [PATCH 26/45] Fix additional re-entrancy issues found. - check_set_special_type_attr() is potentially re-entrant. Fix by refactoring type_set_bases_world_stopped() into two functions, one to check the args and one to actually set tp_base. - the type watcher stuff is re-entrant. Fix by unlocking the mutex before handling those. - add `is_new_type` parameter to mro_internal() and friends. This explicitly tells us if we are initializing a new type or modifying the bases of an existing one. The stop-the-world mechanism is used in only the second case. - rename best_base() to find_best_base() --- Objects/typeobject.c | 92 ++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 132db65625d84a..0088704767d555 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1158,6 +1158,9 @@ type_modified_unlocked(PyTypeObject *type) // Notify registered type watchers, if any if (type->tp_watched) { + // We must unlock here because at least the PyErr_FormatUnraisable + // call is re-entrant and the watcher callback might be too. + types_mutex_unlock(); PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1174,6 +1177,7 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } + types_mutex_lock(); } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ @@ -1694,8 +1698,8 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) return mro; } -static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, int, PyObject **); +static PyTypeObject *find_best_base(PyObject *); +static int mro_internal(PyTypeObject *, int, PyObject **, bool); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1711,12 +1715,17 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); static int -mro_hierarchy(PyTypeObject *type, PyObject *temp) +mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) { - ASSERT_NEW_OR_STOPPED(type); - +#ifdef Py_DEBUG + if (is_new_type) { + assert(!TYPE_IS_REVEALED(type)); + } else { + assert(types_world_is_stopped()); + } +#endif PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro); + int res = mro_internal(type, 0, &old_mro, is_new_type); if (res <= 0) { /* error / reentrance */ return res; @@ -1766,7 +1775,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp); + res = mro_hierarchy(subclass, temp, is_new_type); if (res < 0) { break; } @@ -1778,14 +1787,13 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) +type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments + // Check arguments, this re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } assert(new_bases != NULL); - assert(types_world_is_stopped()); if (!PyTuple_Check(new_bases)) { PyErr_Format(PyExc_TypeError, @@ -1830,26 +1838,34 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) } // Compute the new MRO and the new base class - PyTypeObject *new_base = best_base(new_bases); - if (new_base == NULL) + *best_base = find_best_base(new_bases); + if (*best_base == NULL) return -1; - if (!compatible_for_assignment(type->tp_base, new_base, "__bases__")) { + if (!compatible_for_assignment(type->tp_base, *best_base, "__bases__")) { return -1; } + return 0; +} + +static int +type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +{ + assert(types_world_is_stopped()); + PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; set_tp_bases(type, Py_NewRef(new_bases), 0); - type->tp_base = (PyTypeObject *)Py_NewRef(new_base); + type->tp_base = (PyTypeObject *)Py_NewRef(best_base); PyObject *temp = PyList_New(0); if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp) < 0) { + if (mro_hierarchy(type, temp, false) < 0) { goto undo; } Py_DECREF(temp); @@ -1881,7 +1897,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) return res; undo: - n = PyList_GET_SIZE(temp); + Py_ssize_t n = PyList_GET_SIZE(temp); for (Py_ssize_t i = n - 1; i >= 0; i--) { PyTypeObject *cls; PyObject *new_mro, *old_mro = NULL; @@ -1898,13 +1914,13 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) bail: if (lookup_tp_bases(type) == new_bases) { - assert(type->tp_base == new_base); + assert(type->tp_base == best_base); set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); - Py_DECREF(new_base); + Py_DECREF(best_base); } else { Py_DECREF(old_bases); @@ -1919,9 +1935,13 @@ static int type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); + PyTypeObject *best_base; + if (type_check_new_bases(type, new_bases, &best_base) < 0) { + return -1; + } int res; types_stop_world(); - res = type_set_bases_world_stopped(type, new_bases); + res = type_set_bases_world_stopped(type, new_bases, best_base); types_start_world(); return res; } @@ -3458,7 +3478,7 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type) +mro_invoke(PyTypeObject *type, bool is_new_type) { PyObject *mro_result; PyObject *new_mro; @@ -3467,10 +3487,8 @@ mro_invoke(PyTypeObject *type) if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. - // We are called either from type_ready() or from type_set_bases() - // (assignment to __bases__). - bool stopped = types_world_is_stopped(); - if (stopped) { + // We are called either from type_ready() or from type_set_bases(). + if (!is_new_type) { // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } @@ -3482,7 +3500,7 @@ mro_invoke(PyTypeObject *type) else { new_mro = NULL; } - if (stopped) { + if (!is_new_type) { types_stop_world(); } } @@ -3538,9 +3556,15 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_type) { - ASSERT_NEW_OR_STOPPED(type); +#ifdef Py_DEBUG + if (is_new_type) { + assert(!TYPE_IS_REVEALED(type)); + } else { + assert(types_world_is_stopped()); + } +#endif PyObject *new_mro, *old_mro; int reent; @@ -3549,7 +3573,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type); /* might cause reentrance */ + new_mro = mro_invoke(type, is_new_type); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3590,7 +3614,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) This is the first one that's on the path to the "solid base". */ static PyTypeObject * -best_base(PyObject *bases) +find_best_base(PyObject *bases) { Py_ssize_t i, n; PyTypeObject *base, *winner, *candidate; @@ -4725,7 +4749,7 @@ type_new_get_bases(type_new_ctx *ctx, PyObject **type) } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(ctx->bases); + PyTypeObject *base = find_best_base(ctx->bases); if (base == NULL) { return -1; } @@ -5140,12 +5164,12 @@ PyType_FromMetaclass( } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(bases); // borrowed ref + PyTypeObject *base = find_best_base(bases); // borrowed ref if (base == NULL) { goto finally; } - // best_base should check Py_TPFLAGS_BASETYPE & raise a proper exception, - // here we just check its work + // find_best_base() should check Py_TPFLAGS_BASETYPE & raise a proper + // exception, here we just check its work assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)); /* Calculate sizes */ @@ -8617,7 +8641,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL, true) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); From caf6554415ef00e3f819b49752b50d6d2f5835a8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:37:30 -0700 Subject: [PATCH 27/45] Add comment explaining class_name() code. --- Objects/typeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0088704767d555..93f3d39c5c7faa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3159,7 +3159,8 @@ static PyObject * class_name(PyObject *cls) { #ifdef Py_GIL_DISABLED - // Avoid re-entrancy and use tp_name + // Avoid re-entrancy and use tp_name. This is only used for the + // mro_implementation() sanity check of the computed mro. return type_name(cls, NULL); #else PyObject *name; From 90ea541dceddfae060ac889ee5d06da68e1e4f1f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 22:39:58 -0700 Subject: [PATCH 28/45] Use atomic load to avoid thread safety issue. --- Python/ceval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 1a1ca5bfb50a45..e57ca0a71fcc89 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -147,7 +147,8 @@ check_invalid_reentrancy(void) // that's a bug somewhere (likely in the painfully complex typeobject code). PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!interp->stoptheworld.world_stopped); - assert(interp->types.mutex_tid != PyThread_get_thread_ident_ex()); + assert(_Py_atomic_load_ullong_relaxed(&interp->types.mutex_tid) != + PyThread_get_thread_ident_ex()); #endif } From 0dc0faf4c71a523a4fcdb962231766ff48afa881 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 22:43:59 -0700 Subject: [PATCH 29/45] Fix default debug build. --- Objects/typeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7f5fbd1b77df69..79d394f9d4b30e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -167,6 +167,7 @@ types_mutex_unlock(void) #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() #define ASSERT_NEW_OR_STOPPED(tp) +#define TYPE_IS_REVEALED(tp) 0 #endif From 956e5d1bb447f64bd54f05183d9fa59226e13354 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 23:14:28 -0700 Subject: [PATCH 30/45] Move declaration to avoid syntax error. --- Objects/typeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 79d394f9d4b30e..1cb989c698c5b1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1856,6 +1856,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje { assert(types_world_is_stopped()); + Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; @@ -1899,7 +1900,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje return res; undo: - Py_ssize_t n = PyList_GET_SIZE(temp); + n = PyList_GET_SIZE(temp); for (Py_ssize_t i = n - 1; i >= 0; i--) { PyTypeObject *cls; PyObject *new_mro, *old_mro = NULL; From 2bb710c6d7930cc603062a1f40fc568eb3a95a6f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 11:10:22 -0700 Subject: [PATCH 31/45] Remove _PyType_GetVersionForCurrentState, unused. --- Include/internal/pycore_typeobject.h | 1 - Objects/typeobject.c | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 1a4f89fd2449a0..0ee7d555c56cdd 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -134,7 +134,6 @@ extern int _PyType_AddMethod(PyTypeObject *, PyMethodDef *); extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags); -extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); PyTypeObject *_PyType_LookupByVersion(unsigned int version); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1cb989c698c5b1..b1830ae87d1d12 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1319,18 +1319,6 @@ _PyType_LookupByVersion(unsigned int version) #endif } -unsigned int -_PyType_GetVersionForCurrentState(PyTypeObject *tp) -{ -#ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint32_acquire(&tp->tp_version_tag); -#else - return tp->tp_version_tag; -#endif -} - - - #define MAX_VERSIONS_PER_CLASS 1000 #if _Py_ATTR_CACHE_UNUSED < MAX_VERSIONS_PER_CLASS #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" From 3a9bc96bc5682ff773bb2e6bd88db1fbb48e9775 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:25:47 -0700 Subject: [PATCH 32/45] Fix for possible re-entrancy in has_custom_mro(). --- Objects/typeobject.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b1830ae87d1d12..19ffc5519b9c72 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1228,7 +1228,7 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases) +type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) { ASSERT_NEW_OR_STOPPED(type); /* @@ -1243,21 +1243,24 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ -#ifdef Py_GIL_DISABLED - unsigned int meta_version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(type)->tp_version_tag); -#endif - bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - - if (is_custom) { - goto clear; - } -#ifdef Py_GIL_DISABLED - if (meta_version != Py_TYPE(type)->tp_version_tag) { - // Raced with another thread mutating the metaclass, possible that mro() - // method was assigned. - goto clear; + if (!Py_IS_TYPE(type, &PyType_Type)) { + unsigned int meta_version = Py_TYPE(type)->tp_version_tag; + if (!is_new_type) { + types_start_world(); + } + // This can be re-entrant. + bool is_custom = has_custom_mro(type); + if (!is_new_type) { + types_stop_world(); + } + if (meta_version != Py_TYPE(type)->tp_version_tag) { + // metaclass changed during call of has_custom_mro() + goto clear; + } + if (is_custom) { + goto clear; + } } -#endif Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); @@ -3598,10 +3601,10 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro); + type_mro_modified(type, new_mro, is_new_type); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type)); + type_mro_modified(type, lookup_tp_bases(type), is_new_type); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { From d742a53d6711deb0d140e29fb9a90c128ff9412b Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:27:27 -0700 Subject: [PATCH 33/45] Use correct FT_ATOMIC_ macro. --- Objects/typeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 19ffc5519b9c72..191cafd0332b4f 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -48,7 +48,7 @@ class object "PyObject *" "&PyBaseObject_Type" & ((1 << MCACHE_SIZE_EXP) - 1)) #define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag), \ + MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ ((Py_ssize_t)(name)) >> 3) #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ @@ -5838,7 +5838,7 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag); + unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); if (version == 0) { types_mutex_lock(); PyInterpreterState *interp = _PyInterpreterState_GET(); From e7480c348b3b52fc21b9b56261eb1b1d70112213 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:28:13 -0700 Subject: [PATCH 34/45] Remove TSAN suppression for 'assign_version_tag'. --- Tools/tsan/suppressions_free_threading.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 7d6cd6c10f5218..404c30157362aa 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -12,7 +12,6 @@ # These warnings trigger directly in a CPython function. -race_top:assign_version_tag race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode @@ -41,4 +40,4 @@ race:list_inplace_repeat_lock_held # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 -race:PyObject_Realloc \ No newline at end of file +race:PyObject_Realloc From e2ea2819a344982ff8abea479e0f7004b9099690 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:39:47 -0700 Subject: [PATCH 35/45] Small efficiency fix for types_mutex_set_owned(). --- Objects/typeobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 191cafd0332b4f..8adde94d5244f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -132,7 +132,6 @@ types_mutex_is_owned(void) PyThread_ident_t tid = PyThread_get_thread_ident_ex(); return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; } -#endif // Set the TID of the thread currently holding TYPE_LOCK. static void @@ -140,19 +139,24 @@ types_mutex_set_owned(PyThread_ident_t tid) { _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); } +#endif static void types_mutex_lock(void) { assert(!types_mutex_is_owned()); PyMutex_Lock(TYPE_LOCK); - types_mutex_set_owned(PyThread_get_thread_ident_ex()); +#ifdef Py_DEBUG + types_mutex_set_owned(_Py_ThreadId()); +#endif } static void types_mutex_unlock(void) { +#ifdef Py_DEBUG types_mutex_set_owned(0); +#endif PyMutex_Unlock(TYPE_LOCK); } From 935bfcacdb6f1dd47804e930515cc9f346683fdd Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 14:53:22 -0700 Subject: [PATCH 36/45] Revert to using critical section with TYPE_LOCK. --- Include/internal/pycore_interp_structs.h | 4 - Objects/typeobject.c | 123 ++++++++--------------- Python/ceval.c | 6 +- 3 files changed, 46 insertions(+), 87 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f816cdf0a40fcc..cd037d0bf9dc65 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -566,10 +566,6 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; -#ifdef Py_GIL_DISABLED - // used to check correct usage of the above mutex - unsigned long long mutex_tid; -#endif // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 8adde94d5244f6..9232c11eee1a26 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -73,17 +73,14 @@ class object "PyObject *" "&PyBaseObject_Type" // while the stop-the-world mechanism is active. The slots and flags are read // in many places without holding a lock and without atomics. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex +#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) +#define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() -// Since TYPE_LOCK is used as regular mutex, we must take special care about -// potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to -// ensure that we are not trying to re-acquire the mutex when it is already -// held by the current thread. There are a couple cases when we release the -// mutex, when we call functions that might re-enter. If we already hold the -// lock and try to acquire it again with the same thread, it is a bug in the -// code. We could just let it deadlock but having an assert failure gives -// a more explicit error. -#define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +#define BEGIN_TYPE_DICT_LOCK(d) \ + Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) +#define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +#ifdef Py_DEBUG // Return true if the world is currently stopped. static bool types_world_is_stopped(void) @@ -91,6 +88,7 @@ types_world_is_stopped(void) PyInterpreterState *interp = _PyInterpreterState_GET(); return interp->stoptheworld.world_stopped; } +#endif // Checks that the type has not yet been revealed (exposed) to other // threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and @@ -102,6 +100,9 @@ types_world_is_stopped(void) #define TYPE_IS_REVEALED(tp) 0 #endif +#define ASSERT_TYPE_LOCK_HELD() \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + // Checks if we can safely update type slots or tp_flags. #define ASSERT_NEW_OR_STOPPED(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) @@ -124,54 +125,18 @@ types_start_world(void) assert(!types_world_is_stopped()); } -#ifdef Py_DEBUG -// Return true if the TYPE_LOCK mutex is held by the current thread. -static bool -types_mutex_is_owned(void) -{ - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; -} - -// Set the TID of the thread currently holding TYPE_LOCK. -static void -types_mutex_set_owned(PyThread_ident_t tid) -{ - _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); -} -#endif - -static void -types_mutex_lock(void) -{ - assert(!types_mutex_is_owned()); - PyMutex_Lock(TYPE_LOCK); -#ifdef Py_DEBUG - types_mutex_set_owned(_Py_ThreadId()); -#endif -} - -static void -types_mutex_unlock(void) -{ -#ifdef Py_DEBUG - types_mutex_set_owned(0); -#endif - PyMutex_Unlock(TYPE_LOCK); -} - -#define ASSERT_TYPE_LOCK_HELD() assert(types_mutex_is_owned()) - #else -#define types_mutex_lock() -#define types_mutex_unlock() +#define BEGIN_TYPE_LOCK() +#define END_TYPE_LOCK() +#define BEGIN_TYPE_DICT_LOCK(d) +#define END_TYPE_DICT_LOCK() +#define ASSERT_TYPE_LOCK_HELD() +#define TYPE_IS_REVEALED(tp) 0 +#define ASSERT_NEW_OR_STOPPED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() -#define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) -#define TYPE_IS_REVEALED(tp) 0 #endif @@ -1067,10 +1032,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification - types_mutex_lock(); + BEGIN_TYPE_LOCK(); assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); - types_mutex_unlock(); + END_TYPE_LOCK(); return 0; } @@ -1164,9 +1129,8 @@ type_modified_unlocked(PyTypeObject *type) // Notify registered type watchers, if any if (type->tp_watched) { - // We must unlock here because at least the PyErr_FormatUnraisable - // call is re-entrant and the watcher callback might be too. - types_mutex_unlock(); + // Note that PyErr_FormatUnraisable is re-entrant and the watcher + // callback might be too. PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1183,7 +1147,6 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } - types_mutex_lock(); } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ @@ -1203,9 +1166,9 @@ PyType_Modified(PyTypeObject *type) return; } - types_mutex_lock(); + BEGIN_TYPE_LOCK(); type_modified_unlocked(type); - types_mutex_unlock(); + END_TYPE_LOCK(); } static int @@ -1304,9 +1267,9 @@ This is similar to func_version_cache. void _PyType_SetVersion(PyTypeObject *tp, unsigned int version) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); set_version_unlocked(tp, version); - types_mutex_unlock(); + END_TYPE_LOCK(); } PyTypeObject * @@ -1384,9 +1347,9 @@ int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); int assigned; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); assigned = assign_version_tag(interp, type); - types_mutex_unlock(); + END_TYPE_LOCK(); return assigned; } @@ -5844,7 +5807,7 @@ type_assign_version(PyTypeObject *type) { unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); if (version == 0) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { version = type->tp_version_tag; @@ -5852,7 +5815,7 @@ type_assign_version(PyTypeObject *type) else { version = 0; } - types_mutex_unlock(); + END_TYPE_LOCK(); } return version; } @@ -5985,7 +5948,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return 0; } int can_cache; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(init); @@ -5993,7 +5956,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, if (can_cache) { FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); } - types_mutex_unlock(); + END_TYPE_LOCK(); return can_cache; } @@ -6004,7 +5967,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor return 0; } int can_cache; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): @@ -6018,7 +5981,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); } - types_mutex_unlock(); + END_TYPE_LOCK(); return can_cache; } @@ -6038,7 +6001,7 @@ int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version) { int err; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); err = validate(ty); if (!err) { if(assign_version_tag(_PyInterpreterState_GET(), ty)) { @@ -6048,7 +6011,7 @@ _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_ err = -1; } } - types_mutex_unlock(); + END_TYPE_LOCK(); return err; } @@ -6298,6 +6261,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) if (is_dunder_name(name) && has_slotdef(name)) { // The name corresponds to a type slot. types_stop_world(); + // Since the world is stopped, we actually don't need this critical + // section. However, this internally calls dict methods that assert + // that the dict object mutex is held. So, we acquire it here to + // avoid those assert failing. Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); if (res == 0) { @@ -6306,12 +6273,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) Py_END_CRITICAL_SECTION(); types_start_world(); } else { - // The name is not a slot, hold TYPE_LOCK while updating version tags. - types_mutex_lock(); - Py_BEGIN_CRITICAL_SECTION(dict); + // The name is not a slot. + BEGIN_TYPE_DICT_LOCK(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); - Py_END_CRITICAL_SECTION(); - types_mutex_unlock(); + END_TYPE_DICT_LOCK(); } if (res == -2) { // We handle the attribute error case here because reentrancy possible @@ -6415,10 +6380,10 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); type_clear_flags(type, Py_TPFLAGS_READY); set_version_unlocked(type, 0); - types_mutex_unlock(); + END_TYPE_LOCK(); } _PyStaticType_ClearWeakRefs(interp, type); diff --git a/Python/ceval.c b/Python/ceval.c index e57ca0a71fcc89..68991f72395e3b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -143,12 +143,10 @@ check_invalid_reentrancy(void) { #if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) // In the free-threaded build, the interpreter must not be re-entered if - // either the world-is-stopped or if the types mutex is held. If so, - // that's a bug somewhere (likely in the painfully complex typeobject code). + // the world-is-stopped. If so, that's a bug somewhere (quite likely in + // the painfully complex typeobject code). PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!interp->stoptheworld.world_stopped); - assert(_Py_atomic_load_ullong_relaxed(&interp->types.mutex_tid) != - PyThread_get_thread_ident_ex()); #endif } From a81e9e395335c1cdf9e9ec41dcb1808d92836fba Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 16:45:10 -0700 Subject: [PATCH 37/45] Invalidate type cache before calling watchers. This avoids the watcher callback potentially using invalid cache entries. --- Objects/typeobject.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1f5269ddcbb8e3..366705d883d5c1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1127,6 +1127,14 @@ type_modified_unlocked(PyTypeObject *type) } } + set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // This field *must* be invalidated if the type is modified (see the + // comment on struct _specialization_cache): + FT_ATOMIC_STORE_PTR_RELAXED( + ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); + } + // Notify registered type watchers, if any if (type->tp_watched) { // Note that PyErr_FormatUnraisable is re-entrant and the watcher @@ -1148,14 +1156,6 @@ type_modified_unlocked(PyTypeObject *type) bits >>= 1; } } - - set_version_unlocked(type, 0); /* 0 is not a valid version tag */ - if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - // This field *must* be invalidated if the type is modified (see the - // comment on struct _specialization_cache): - FT_ATOMIC_STORE_PTR_RELAXED( - ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); - } } void From f5df0c3a05afdff894d431b47c2d03c49f6cac94 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 21:09:04 -0700 Subject: [PATCH 38/45] Fixes for type_modified_unlocked(). * Since type_modified_unlocked() can be called with the world stopped then we need to re-start if making re-entrant calls (like calling type watcher callbacks). * Re-order code before type_modified_unlocked(). This fixes a potential bug if called functions end up re-assigning the type version tag. If they do, the cache can contain invalid entries. We should do the dictionary update first then set the version to zero. * Replace 'is_new_type' with 'world_stopped' boolean parameter. This seems a bit more clear. --- Objects/typeobject.c | 91 ++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 37 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 366705d883d5c1..c785817c31c131 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1088,7 +1088,7 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } static void -type_modified_unlocked(PyTypeObject *type) +type_modified_unlocked(PyTypeObject *type, bool world_stopped) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -1105,6 +1105,15 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ +#ifdef Py_DEBUG + if (world_stopped) { + assert(types_world_is_stopped()); + } else { + if (TYPE_IS_REVEALED(type)) { + ASSERT_TYPE_LOCK_HELD(); + } + } +#endif if (type->tp_version_tag == 0) { return; } @@ -1122,7 +1131,7 @@ type_modified_unlocked(PyTypeObject *type) if (subclass == NULL) { continue; } - type_modified_unlocked(subclass); + type_modified_unlocked(subclass, world_stopped); Py_DECREF(subclass); } } @@ -1139,6 +1148,9 @@ type_modified_unlocked(PyTypeObject *type) if (type->tp_watched) { // Note that PyErr_FormatUnraisable is re-entrant and the watcher // callback might be too. + if (world_stopped) { + types_start_world(); + } PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1155,6 +1167,9 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } + if (world_stopped) { + types_stop_world(); + } } } @@ -1167,7 +1182,7 @@ PyType_Modified(PyTypeObject *type) } BEGIN_TYPE_LOCK(); - type_modified_unlocked(type); + type_modified_unlocked(type, false); END_TYPE_LOCK(); } @@ -1195,7 +1210,7 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) +type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) { ASSERT_NEW_OR_STOPPED(type); /* @@ -1212,12 +1227,12 @@ type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) */ if (!Py_IS_TYPE(type, &PyType_Type)) { unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - if (!is_new_type) { + if (world_stopped) { types_start_world(); } // This can be re-entrant. bool is_custom = has_custom_mro(type); - if (!is_new_type) { + if (world_stopped) { types_stop_world(); } if (meta_version != Py_TYPE(type)->tp_version_tag) { @@ -1621,7 +1636,7 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) } types_stop_world(); - type_modified_unlocked(type); + type_modified_unlocked(type, true); if (abstract) type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else @@ -1675,17 +1690,17 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); static int -mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) +mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) { #ifdef Py_DEBUG - if (is_new_type) { + if (!world_stopped) { assert(!TYPE_IS_REVEALED(type)); } else { assert(types_world_is_stopped()); } #endif PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro, is_new_type); + int res = mro_internal(type, 0, &old_mro, world_stopped); if (res <= 0) { /* error / reentrance */ return res; @@ -1735,7 +1750,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp, is_new_type); + res = mro_hierarchy(subclass, temp, world_stopped); if (res < 0) { break; } @@ -1826,7 +1841,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp, false) < 0) { + if (mro_hierarchy(type, temp, true) < 0) { goto undo; } Py_DECREF(temp); @@ -3464,7 +3479,7 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type, bool is_new_type) +mro_invoke(PyTypeObject *type, bool world_stopped) { PyObject *mro_result; PyObject *new_mro; @@ -3474,7 +3489,7 @@ mro_invoke(PyTypeObject *type, bool is_new_type) if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. // We are called either from type_ready() or from type_set_bases(). - if (!is_new_type) { + if (world_stopped) { // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } @@ -3486,7 +3501,7 @@ mro_invoke(PyTypeObject *type, bool is_new_type) else { new_mro = NULL; } - if (!is_new_type) { + if (world_stopped) { types_stop_world(); } } @@ -3542,10 +3557,10 @@ mro_invoke(PyTypeObject *type, bool is_new_type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_type) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_stopped) { #ifdef Py_DEBUG - if (is_new_type) { + if (!world_stopped) { assert(!TYPE_IS_REVEALED(type)); } else { assert(types_world_is_stopped()); @@ -3559,7 +3574,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type, is_new_type); /* might cause reentrance */ + new_mro = mro_invoke(type, world_stopped); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3573,14 +3588,14 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro, is_new_type); + type_mro_modified(type, new_mro, world_stopped); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type), is_new_type); + type_mro_modified(type, lookup_tp_bases(type), world_stopped); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - type_modified_unlocked(type); + type_modified_unlocked(type, world_stopped); } else { /* For static builtin types, this is only called during init @@ -6162,7 +6177,7 @@ _Py_type_getattro(PyObject *tp, PyObject *name) // the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, - PyObject *value, PyObject **old_value) + PyObject *value, PyObject **old_value, bool world_stopped) { // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor @@ -6174,13 +6189,6 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - /* Clear the VALID_VERSION flag of 'type' and all its - subclasses. This could possibly be unified with the - update_subclasses() recursion in update_slot(), but carefully: - they each have their own conditions on which to stop - recursing into subclasses. */ - type_modified_unlocked(type); - if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", @@ -6188,6 +6196,13 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -2; } + /* Clear the VALID_VERSION flag of 'type' and all its + subclasses. This could possibly be unified with the + update_subclasses() recursion in update_slot(), but carefully: + they each have their own conditions on which to stop + recursing into subclasses. */ + type_modified_unlocked(type, world_stopped); + return 0; } @@ -6267,7 +6282,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // that the dict object mutex is held. So, we acquire it here to // avoid those assert failing. Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value, true); if (res == 0) { res = update_slot(type, name); } @@ -6276,7 +6292,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } else { // The name is not a slot. BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value, false); END_TYPE_DICT_LOCK(); } if (res == -2) { @@ -8625,7 +8642,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL, true) < 0) { + if (mro_internal(type, initial, NULL, false) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -11434,15 +11451,15 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; - ASSERT_NEW_OR_STOPPED(type); - - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type); + assert(types_world_is_stopped()); for (p = slotdefs; p->name; p++) { /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } + + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ + type_modified_unlocked(type, true); } @@ -11715,7 +11732,7 @@ PyType_Freeze(PyTypeObject *type) types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); - type_modified_unlocked(type); + type_modified_unlocked(type, true); types_start_world(); return 0; From 7db281c7e04c0ab4e722089910021e98b5c65a3e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 22 Apr 2025 16:18:36 -0700 Subject: [PATCH 39/45] Major re-work, TYPE_LOCK protects more things. Rather than trying to update tp_base, tp_bases, and tp_mro while the world is stopped, hold TYPE_LOCK when updating them. This is much more similar to how the base version of the code works and requires many fewer changes. Using stop-the-world turned out to be difficult to make work correctly, mostly due to custom mro() methods but also for some other reasons (for example, functions that potentially re-enter the interpreter). This re-work requires that we can call stop-the-world inside a critical section and have the mutex still held after re-starting. This seems to work and I've added asserts to confirm it. Stop-the-world is now only used when updating either tp_flags or other type slots and not the three type members listed above. --- Objects/typeobject.c | 338 ++++++++++++++++++++++--------------------- 1 file changed, 175 insertions(+), 163 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c785817c31c131..0eaff5009ff092 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -61,13 +61,14 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED // There's a global lock for types that ensures that tp_version_tag and -// _spec_cache are correctly updated if the type is modified. This avoids -// having to take additional locks while doing various subclass processing -// which may result in odd behaviors w.r.t. running with the GIL as the outer -// type lock could be released and reacquired during a subclass update if -// there's contention on the subclass lock. +// _spec_cache are correctly updated if the type is modified. It also protects +// tp_mro, tp_bases, and tp_base. This avoids having to take additional locks +// while doing various subclass processing which may result in odd behaviors +// w.r.t. running with the GIL as the outer type lock could be released and +// reacquired during a subclass update if there's contention on the subclass +// lock. // -// Note that this lock does not protect when updating type slots or the +// Note that this lock does not protect updates of other type slots or the // tp_flags member. Instead, we either ensure those updates are done before // the type has been revealed to other threads or we only do those updates // while the stop-the-world mechanism is active. The slots and flags are read @@ -78,6 +79,7 @@ class object "PyObject *" "&PyBaseObject_Type" #define BEGIN_TYPE_DICT_LOCK(d) \ Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) + #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() #ifdef Py_DEBUG @@ -101,12 +103,15 @@ types_world_is_stopped(void) #endif #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } // Checks if we can safely update type slots or tp_flags. #define ASSERT_NEW_OR_STOPPED(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) +#define ASSERT_NEW_OR_LOCKED(tp) \ + if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } + static void types_stop_world(void) { @@ -134,6 +139,7 @@ types_start_world(void) #define ASSERT_TYPE_LOCK_HELD() #define TYPE_IS_REVEALED(tp) 0 #define ASSERT_NEW_OR_STOPPED(tp) +#define ASSERT_NEW_OR_LOCKED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() @@ -532,8 +538,10 @@ _PyType_GetBases(PyTypeObject *self) { PyObject *res; + BEGIN_TYPE_LOCK(); res = lookup_tp_bases(self); Py_INCREF(res); + END_TYPE_LOCK(); return res; } @@ -542,7 +550,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); - ASSERT_NEW_OR_STOPPED(self); + ASSERT_NEW_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -587,21 +595,35 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { + ASSERT_NEW_OR_LOCKED(self); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { - // This is safe in the free-threaded build because tp_mro cannot be - // assigned unless the type is new or if world is stopped. +#ifdef Py_GIL_DISABLED + PyObject *mro = _Py_atomic_load_ptr_relaxed(&self->tp_mro); + if (mro == NULL) { + return NULL; + } + if (_Py_TryIncrefCompare(&self->tp_mro, mro)) { + return mro; + } + + BEGIN_TYPE_LOCK(); + mro = lookup_tp_mro(self); + Py_XINCREF(mro); + END_TYPE_LOCK(); + return mro; +#else return Py_XNewRef(lookup_tp_mro(self)); +#endif } static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { - ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1088,7 +1110,7 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } static void -type_modified_unlocked(PyTypeObject *type, bool world_stopped) +type_modified_unlocked(PyTypeObject *type) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -1105,15 +1127,7 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) We don't assign new version tags eagerly, but only as needed. */ -#ifdef Py_DEBUG - if (world_stopped) { - assert(types_world_is_stopped()); - } else { - if (TYPE_IS_REVEALED(type)) { - ASSERT_TYPE_LOCK_HELD(); - } - } -#endif + ASSERT_NEW_OR_LOCKED(type); if (type->tp_version_tag == 0) { return; } @@ -1131,7 +1145,7 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) if (subclass == NULL) { continue; } - type_modified_unlocked(subclass, world_stopped); + type_modified_unlocked(subclass); Py_DECREF(subclass); } } @@ -1146,17 +1160,14 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) // Notify registered type watchers, if any if (type->tp_watched) { - // Note that PyErr_FormatUnraisable is re-entrant and the watcher - // callback might be too. - if (world_stopped) { - types_start_world(); - } PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; while (bits) { assert(i < TYPE_MAX_WATCHERS); if (bits & 1) { + // Note that PyErr_FormatUnraisable is potentially re-entrant + // and the watcher callback might be too. PyType_WatchCallback cb = interp->type_watchers[i]; if (cb && (cb(type) < 0)) { PyErr_FormatUnraisable( @@ -1167,9 +1178,6 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) i++; bits >>= 1; } - if (world_stopped) { - types_stop_world(); - } } } @@ -1182,7 +1190,7 @@ PyType_Modified(PyTypeObject *type) } BEGIN_TYPE_LOCK(); - type_modified_unlocked(type, false); + type_modified_unlocked(type); END_TYPE_LOCK(); } @@ -1210,9 +1218,8 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) +type_mro_modified(PyTypeObject *type, PyObject *bases) { - ASSERT_NEW_OR_STOPPED(type); /* Check that all base classes or elements of the MRO of type are able to be cached. This function is called after the base @@ -1225,16 +1232,11 @@ type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ + ASSERT_TYPE_LOCK_HELD(); if (!Py_IS_TYPE(type, &PyType_Type)) { unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - if (world_stopped) { - types_start_world(); - } // This can be re-entrant. bool is_custom = has_custom_mro(type); - if (world_stopped) { - types_stop_world(); - } if (meta_version != Py_TYPE(type)->tp_version_tag) { // metaclass changed during call of has_custom_mro() goto clear; @@ -1635,13 +1637,16 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) return -1; } + BEGIN_TYPE_LOCK(); + type_modified_unlocked(type); types_stop_world(); - type_modified_unlocked(type, true); if (abstract) type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); types_start_world(); + ASSERT_TYPE_LOCK_HELD(); + END_TYPE_LOCK(); return 0; } @@ -1663,6 +1668,7 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); PyObject *mro; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(type); if (mro == NULL) { mro = Py_None; @@ -1670,11 +1676,12 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) Py_INCREF(mro); } + END_TYPE_LOCK(); return mro; } static PyTypeObject *find_best_base(PyObject *); -static int mro_internal(PyTypeObject *, int, PyObject **, bool); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1689,18 +1696,15 @@ static int update_subclasses(PyTypeObject *type, PyObject *attr_name, static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); +// Compute tp_mro for this type and all of its subclasses. This +// is called after __bases__ is assigned to an existing type. static int -mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) +mro_hierarchy(PyTypeObject *type, PyObject *temp) { -#ifdef Py_DEBUG - if (!world_stopped) { - assert(!TYPE_IS_REVEALED(type)); - } else { - assert(types_world_is_stopped()); - } -#endif + ASSERT_TYPE_LOCK_HELD(); + PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro, world_stopped); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1750,7 +1754,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp, world_stopped); + res = mro_hierarchy(subclass, temp); if (res < 0) { break; } @@ -1825,9 +1829,9 @@ type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **bes } static int -type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) { - assert(types_world_is_stopped()); + ASSERT_TYPE_LOCK_HELD(); Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); @@ -1841,7 +1845,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp, true) < 0) { + if (mro_hierarchy(type, temp) < 0) { goto undo; } Py_DECREF(temp); @@ -1859,7 +1863,10 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje add to all new_bases */ remove_all_subclasses(type, old_bases); res = add_all_subclasses(type, new_bases); + types_stop_world(); update_all_slots(type); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); } else { res = 0; @@ -1912,13 +1919,13 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); PyTypeObject *best_base; - if (type_check_new_bases(type, new_bases, &best_base) < 0) { - return -1; - } int res; - types_stop_world(); - res = type_set_bases_world_stopped(type, new_bases, best_base); - types_start_world(); + BEGIN_TYPE_LOCK(); + res = type_check_new_bases(type, new_bases, &best_base); + if (res == 0) { + res = type_set_bases_unlocked(type, new_bases, best_base); + } + END_TYPE_LOCK(); return res; } @@ -3158,17 +3165,12 @@ tail_contains(PyObject *tuple, Py_ssize_t whence, PyObject *o) static PyObject * class_name(PyObject *cls) { -#ifdef Py_GIL_DISABLED - // Avoid re-entrancy and use tp_name. This is only used for the - // mro_implementation() sanity check of the computed mro. - return type_name(cls, NULL); -#else PyObject *name; + // Note that this is potentially re-entrant. if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } return name; -#endif } static int @@ -3333,8 +3335,10 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation(PyTypeObject *type) +mro_implementation_unlocked(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (!_PyType_IsReady(type)) { if (PyType_Ready(type) < 0) return NULL; @@ -3414,6 +3418,16 @@ mro_implementation(PyTypeObject *type) return result; } +static PyObject * +mro_implementation(PyTypeObject *type) +{ + PyObject *mro; + BEGIN_TYPE_LOCK(); + mro = mro_implementation_unlocked(type); + END_TYPE_LOCK(); + return mro; +} + /*[clinic input] type.mro @@ -3479,44 +3493,30 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type, bool world_stopped) +mro_invoke(PyTypeObject *type) { PyObject *mro_result; PyObject *new_mro; + ASSERT_TYPE_LOCK_HELD(); + const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. // We are called either from type_ready() or from type_set_bases(). - if (world_stopped) { - // Called for type_set_bases(), re-assigment of __bases__ - types_start_world(); - } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); - if (mro_result != NULL) { - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); - } - else { - new_mro = NULL; - } - if (world_stopped) { - types_stop_world(); - } } else { // In this case, the mro() method on the type object is being used and // we know that these calls are not re-entrant. - mro_result = mro_implementation(type); - if (mro_result != NULL) { - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); - } - else { - new_mro = NULL; - } + mro_result = mro_implementation_unlocked(type); } + if (mro_result == NULL) + return NULL; + + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); if (new_mro == NULL) { return NULL; } @@ -3557,15 +3557,9 @@ mro_invoke(PyTypeObject *type, bool world_stopped) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_stopped) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { -#ifdef Py_DEBUG - if (!world_stopped) { - assert(!TYPE_IS_REVEALED(type)); - } else { - assert(types_world_is_stopped()); - } -#endif + ASSERT_TYPE_LOCK_HELD(); PyObject *new_mro, *old_mro; int reent; @@ -3574,7 +3568,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_s Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type, world_stopped); /* might cause reentrance */ + new_mro = mro_invoke(type); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3588,14 +3582,14 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_s set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro, world_stopped); + type_mro_modified(type, new_mro); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type), world_stopped); + type_mro_modified(type, lookup_tp_bases(type)); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - type_modified_unlocked(type, world_stopped); + type_modified_unlocked(type); } else { /* For static builtin types, this is only called during init @@ -5535,6 +5529,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; + BEGIN_TYPE_LOCK(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5561,6 +5556,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } + END_TYPE_LOCK(); if (res == NULL) { PyErr_Format( @@ -5770,14 +5766,21 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio } static void -maybe_update_cache(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) +maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, + PyObject *name, unsigned int version_tag, PyObject *value) { if (version_tag == 0) { return; // 0 is not a valid version } + // Calling find_name_in_mro() might cause the type version to change. + // For example, if a __hash__ or __eq__ method mutates the types. + // This case is expected to be rare but we check for it here and avoid + // replacing a valid cache entry with a known to be stale one. + if (FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag) != version_tag) { + return; // version changed during lookup + } PyObject *old_value; -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5821,9 +5824,8 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); + unsigned int version = type->tp_version_tag; if (version == 0) { - BEGIN_TYPE_LOCK(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { version = type->tp_version_tag; @@ -5831,7 +5833,6 @@ type_assign_version(PyTypeObject *type) else { version = 0; } - END_TYPE_LOCK(); } return version; } @@ -5885,17 +5886,14 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef assert(!PyErr_Occurred()); PyObject *res; + int error; unsigned int assigned_version = 0; // 0 is not a valid version + BEGIN_TYPE_LOCK(); if (MCACHE_CACHEABLE_NAME(name)) { assigned_version = type_assign_version(type); } - // Calling find_name_in_mro() might cause the type version to change. For - // example, if a __hash__ or __eq__ method mutates the types. Since this - // case is expected to be rare, it seems better to not explicitly check - // for it (by checking if the version changed) and to do the useless cache - // update anyhow. - int error; res = find_name_in_mro(type, name, &error); + END_TYPE_LOCK(); /* Only put NULL results into cache if there was no error. */ if (error) { @@ -5913,7 +5911,7 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out = PyStackRef_NULL; return 0; } - maybe_update_cache(entry, name, assigned_version, res); + maybe_update_cache(type, entry, name, assigned_version, res); *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; return assigned_version; } @@ -6177,7 +6175,7 @@ _Py_type_getattro(PyObject *tp, PyObject *name) // the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, - PyObject *value, PyObject **old_value, bool world_stopped) + PyObject *value, PyObject **old_value) { // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor @@ -6189,19 +6187,20 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { - PyErr_Format(PyExc_AttributeError, - "type object '%.50s' has no attribute '%U'", - ((PyTypeObject*)type)->tp_name, name); - return -2; - } - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. This could possibly be unified with the update_subclasses() recursion in update_slot(), but carefully: they each have their own conditions on which to stop recursing into subclasses. */ - type_modified_unlocked(type, world_stopped); + type_modified_unlocked(type); + + if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { + PyErr_Format(PyExc_AttributeError, + "type object '%.50s' has no attribute '%U'", + ((PyTypeObject*)type)->tp_name, name); + _PyObject_SetAttributeErrorContext((PyObject *)type, name); + return -1; + } return 0; } @@ -6263,45 +6262,30 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // This is an unlikely case. PyType_Ready has not yet been done and // we need to initialize tp_dict. We don't just do PyType_Ready // because we could already be readying. - types_stop_world(); + BEGIN_TYPE_LOCK(); if (type->tp_dict == NULL) { dict = type->tp_dict = PyDict_New(); } - types_start_world(); + END_TYPE_LOCK(); if (dict == NULL) { res = -1; goto done; } } - if (is_dunder_name(name) && has_slotdef(name)) { - // The name corresponds to a type slot. - types_stop_world(); - // Since the world is stopped, we actually don't need this critical - // section. However, this internally calls dict methods that assert - // that the dict object mutex is held. So, we acquire it here to - // avoid those assert failing. - Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value, true); - if (res == 0) { + BEGIN_TYPE_DICT_LOCK(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value); + if (res == 0) { + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + types_stop_world(); res = update_slot(type, name); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); } - Py_END_CRITICAL_SECTION(); - types_start_world(); - } else { - // The name is not a slot. - BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value, false); - END_TYPE_DICT_LOCK(); - } - if (res == -2) { - // We handle the attribute error case here because reentrancy possible - // when calling this function. - _PyObject_SetAttributeErrorContext((PyObject *)type, name); - res = -1; } + END_TYPE_DICT_LOCK(); assert(res < 0 || _PyType_CheckConsistency(type)); done: @@ -8631,7 +8615,7 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type, int initial) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8642,7 +8626,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL, false) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8706,7 +8690,7 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8903,7 +8887,7 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int initial) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -8995,12 +8979,14 @@ PyType_Ready(PyTypeObject *type) } int res; + BEGIN_TYPE_LOCK(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } + END_TYPE_LOCK(); return res; } @@ -9034,7 +9020,9 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; + BEGIN_TYPE_LOCK(); res = type_ready(self, initial); + END_TYPE_LOCK(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9485,13 +9473,12 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck(PyObject *self, setattrofunc func, const char *what) +hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) { - if (!PyType_Check(self)) { - return 1; - } PyTypeObject *type = Py_TYPE(self); + ASSERT_TYPE_LOCK_HELD(); + PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -9533,6 +9520,20 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) return 1; } +static int +hackcheck(PyObject *self, setattrofunc func, const char *what) +{ + if (!PyType_Check(self)) { + return 1; + } + + int res; + BEGIN_TYPE_LOCK(); + res = hackcheck_unlocked(self, func, what); + END_TYPE_LOCK(); + return res; +} + static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -10680,7 +10681,7 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) } static releasebufferproc -type_lookup_base_releasebuffer(PyObject *self, Py_buffer *buffer) +releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); @@ -10722,7 +10723,9 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - base_releasebuffer = type_lookup_base_releasebuffer(self, buffer); + BEGIN_TYPE_LOCK(); + base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); + END_TYPE_LOCK(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11459,7 +11462,7 @@ update_all_slots(PyTypeObject* type) } /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type, true); + type_modified_unlocked(type); } @@ -11730,10 +11733,13 @@ PyType_Freeze(PyTypeObject *type) return -1; } + BEGIN_TYPE_LOCK(); types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); - type_modified_unlocked(type, true); types_start_world(); + ASSERT_TYPE_LOCK_HELD(); + type_modified_unlocked(type); + END_TYPE_LOCK(); return 0; } @@ -11798,8 +11804,14 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(su_obj_type); + /* keep a strong reference to mro because su_obj_type->tp_mro can be + replaced during PyDict_GetItemRef(dict, name, &res) and because + another thread can modify it after we end the critical section + below */ Py_XINCREF(mro); + END_TYPE_LOCK(); if (mro == NULL) return NULL; From 986f23a7e959a3037a500a6c5c1d0e0fea0a9da7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 22 Apr 2025 17:14:07 -0700 Subject: [PATCH 40/45] Fix non-debug build. --- Objects/typeobject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0eaff5009ff092..e2d33af1bc7fa2 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -102,6 +102,7 @@ types_world_is_stopped(void) #define TYPE_IS_REVEALED(tp) 0 #endif +#ifdef Py_DEBUG #define ASSERT_TYPE_LOCK_HELD() \ if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } @@ -111,6 +112,11 @@ types_world_is_stopped(void) #define ASSERT_NEW_OR_LOCKED(tp) \ if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } +#else +#define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) +#define ASSERT_NEW_OR_LOCKED(tp) +#endif static void types_stop_world(void) From c404ed4b19c3b85e82c1b8e6812940e96a6e5a40 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 12:52:16 -0700 Subject: [PATCH 41/45] Revert unneeded code changes. --- Objects/typeobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e2d33af1bc7fa2..d591c5873f82e6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6269,7 +6269,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // we need to initialize tp_dict. We don't just do PyType_Ready // because we could already be readying. BEGIN_TYPE_LOCK(); - if (type->tp_dict == NULL) { + dict = type->tp_dict; + if (dict == NULL) { dict = type->tp_dict = PyDict_New(); } END_TYPE_LOCK(); @@ -6280,8 +6281,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + assert(_PyType_CheckConsistency(type)); if (res == 0) { if (is_dunder_name(name) && has_slotdef(name)) { // The name corresponds to a type slot. @@ -6292,7 +6293,6 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } END_TYPE_DICT_LOCK(); - assert(res < 0 || _PyType_CheckConsistency(type)); done: Py_DECREF(name); From 0eb77da532b67d760c76213228e815a395453be3 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 17:00:04 -0700 Subject: [PATCH 42/45] Restore comment --- Objects/typeobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d591c5873f82e6..fbb57f96102392 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5891,6 +5891,9 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); + // We need to atomically do the lookup and capture the version before + // anyone else can modify our mro or mutate the type. + PyObject *res; int error; unsigned int assigned_version = 0; // 0 is not a valid version From 16f15b2e3b62c8c95694796293927bfbbbacf183 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 17:15:45 -0700 Subject: [PATCH 43/45] Revert more changes. These are not required as part of this PR and can be reviewed separately. --- Objects/typeobject.c | 128 ++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 76 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index fbb57f96102392..e088f11112854d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1156,14 +1156,6 @@ type_modified_unlocked(PyTypeObject *type) } } - set_version_unlocked(type, 0); /* 0 is not a valid version tag */ - if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - // This field *must* be invalidated if the type is modified (see the - // comment on struct _specialization_cache): - FT_ATOMIC_STORE_PTR_RELAXED( - ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); - } - // Notify registered type watchers, if any if (type->tp_watched) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1185,6 +1177,14 @@ type_modified_unlocked(PyTypeObject *type) bits >>= 1; } } + + set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // This field *must* be invalidated if the type is modified (see the + // comment on struct _specialization_cache): + FT_ATOMIC_STORE_PTR_RELAXED( + ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); + } } void @@ -1238,21 +1238,14 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ + Py_ssize_t i, n; + ASSERT_TYPE_LOCK_HELD(); - if (!Py_IS_TYPE(type, &PyType_Type)) { - unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - // This can be re-entrant. - bool is_custom = has_custom_mro(type); - if (meta_version != Py_TYPE(type)->tp_version_tag) { - // metaclass changed during call of has_custom_mro() - goto clear; - } - if (is_custom) { - goto clear; - } + if (!Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type)) { + goto clear; } - Py_ssize_t n = PyTuple_GET_SIZE(bases); - for (Py_ssize_t i = 0; i < n; i++) { + n = PyTuple_GET_SIZE(bases); + for (i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); @@ -5771,22 +5764,12 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio return old_name; } +#if Py_GIL_DISABLED + static void -maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, - PyObject *name, unsigned int version_tag, PyObject *value) +update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) { - if (version_tag == 0) { - return; // 0 is not a valid version - } - // Calling find_name_in_mro() might cause the type version to change. - // For example, if a __hash__ or __eq__ method mutates the types. - // This case is expected to be rare but we check for it here and avoid - // replacing a valid cache entry with a known to be stale one. - if (FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag) != version_tag) { - return; // version changed during lookup - } - PyObject *old_value; -#ifdef Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5798,16 +5781,16 @@ maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, return; } - old_value = update_cache(entry, name, version_tag, value); + PyObject *old_value = update_cache(entry, name, version_tag, value); // Then update sequence to the next valid value _PySeqLock_UnlockWrite(&entry->sequence); -#else - old_value = update_cache(entry, name, version_tag, value); -#endif + Py_DECREF(old_value); } +#endif + void _PyTypes_AfterFork(void) { @@ -5825,22 +5808,23 @@ _PyTypes_AfterFork(void) #endif } -// Try to assign a new type version tag, return it if successful. Return 0 -// if no version was assigned. -static unsigned int -type_assign_version(PyTypeObject *type) +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ +PyObject * +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) { - unsigned int version = type->tp_version_tag; - if (version == 0) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (assign_version_tag(interp, type)) { - version = type->tp_version_tag; - } - else { - version = 0; - } + _PyStackRef out; + unsigned int ver = _PyType_LookupStackRefAndVersion(type, name, &out); + if (version) { + *version = ver; } - return version; + if (PyStackRef_IsNull(out)) { + return NULL; + } + return PyStackRef_AsPyObjectSteal(out); } unsigned int @@ -5896,12 +5880,15 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef PyObject *res; int error; - unsigned int assigned_version = 0; // 0 is not a valid version + PyInterpreterState *interp = _PyInterpreterState_GET(); + int has_version = 0; + unsigned int assigned_version = 0; BEGIN_TYPE_LOCK(); + res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { - assigned_version = type_assign_version(type); + has_version = assign_version_tag(interp, type); + assigned_version = type->tp_version_tag; } - res = find_name_in_mro(type, name, &error); END_TYPE_LOCK(); /* Only put NULL results into cache if there was no error. */ @@ -5920,28 +5907,17 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out = PyStackRef_NULL; return 0; } - maybe_update_cache(type, entry, name, assigned_version, res); - *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; - return assigned_version; -} -/* Internal API to look for a name through the MRO. - This returns a strong reference, and doesn't set an exception! - If nonzero, version is set to the value of type->tp_version at the time of - the lookup. -*/ -PyObject * -_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) -{ - _PyStackRef out; - unsigned int ver = _PyType_LookupStackRefAndVersion(type, name, &out); - if (version) { - *version = ver; - } - if (PyStackRef_IsNull(out)) { - return NULL; + if (has_version) { +#if Py_GIL_DISABLED + update_cache_gil_disabled(entry, name, assigned_version, res); +#else + PyObject *old_value = update_cache(entry, name, assigned_version, res); + Py_DECREF(old_value); +#endif } - return PyStackRef_AsPyObjectSteal(out); + *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; + return has_version ? assigned_version : 0; } /* Internal API to look for a name through the MRO. From 64547e9b7fb4a9605062fdcbe026474581e0c198 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 24 Apr 2025 20:44:27 -0700 Subject: [PATCH 44/45] Reduce item list size for a few tests. Now that these slot updates use stop-the-world, these two tests are quite a lot slower. Reduce size of the items list from 1000 to 100. --- Lib/test/test_opcache.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index ac132465b227ad..f06f7ce1a7d49f 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -566,6 +566,7 @@ class TestRacesDoNotCrash(TestBase): # Careful with these. Bigger numbers have a higher chance of catching bugs, # but you can also burn through a *ton* of type/dict/function versions: ITEMS = 1000 + SMALL_ITEMS = 100 LOOPS = 4 WRITERS = 2 @@ -609,7 +610,7 @@ class C: __getitem__ = lambda self, item: None items = [] - for _ in range(self.ITEMS): + for _ in range(self.SMALL_ITEMS): item = C() items.append(item) return items @@ -780,7 +781,7 @@ class C: __getattribute__ = lambda self, name: None items = [] - for _ in range(self.ITEMS): + for _ in range(self.SMALL_ITEMS): item = C() items.append(item) return items From 5672352f32bf90fd1a9ff3e2a0bc0c4101c95915 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 28 Apr 2025 13:02:52 -0700 Subject: [PATCH 45/45] Minor code tidy. --- Objects/typeobject.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e088f11112854d..811a27c6405ce2 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -107,15 +107,15 @@ types_world_is_stopped(void) if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } // Checks if we can safely update type slots or tp_flags. -#define ASSERT_NEW_OR_STOPPED(tp) \ +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) -#define ASSERT_NEW_OR_LOCKED(tp) \ +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) \ if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } #else #define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) -#define ASSERT_NEW_OR_LOCKED(tp) +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) #endif static void @@ -144,8 +144,8 @@ types_start_world(void) #define END_TYPE_DICT_LOCK() #define ASSERT_TYPE_LOCK_HELD() #define TYPE_IS_REVEALED(tp) 0 -#define ASSERT_NEW_OR_STOPPED(tp) -#define ASSERT_NEW_OR_LOCKED(tp) +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() @@ -411,14 +411,14 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - ASSERT_NEW_OR_STOPPED(tp); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); tp->tp_flags = flags; } static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_NEW_OR_STOPPED(tp); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -556,7 +556,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); - ASSERT_NEW_OR_LOCKED(self); + ASSERT_NEW_TYPE_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -601,7 +601,7 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_NEW_OR_LOCKED(self); + ASSERT_NEW_TYPE_OR_LOCKED(self); return self->tp_mro; } @@ -1133,7 +1133,7 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_NEW_OR_LOCKED(type); + ASSERT_NEW_TYPE_OR_LOCKED(type); if (type->tp_version_tag == 0) { return; } @@ -1640,9 +1640,9 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) type_modified_unlocked(type); types_stop_world(); if (abstract) - type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); + type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else - type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); + type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); types_start_world(); ASSERT_TYPE_LOCK_HELD(); END_TYPE_LOCK(); @@ -1767,7 +1767,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) static int type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments, this re-entrant due to the PySys_Audit() call + // Check arguments, this is re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } @@ -11252,7 +11252,7 @@ has_slotdef(PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11375,7 +11375,7 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) {