diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 8314fed80fa512..3c9ef84de61039 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1840,7 +1840,7 @@ are always available. They are listed here in alphabetical order. Slice objects are now :term:`hashable` (provided :attr:`~slice.start`, :attr:`~slice.stop`, and :attr:`~slice.step` are hashable). -.. function:: sorted(iterable, /, *, key=None, reverse=False) +.. function:: sorted(iterable, /, *, key=None, keylist=None, reverse=False) Return a new sorted list from the items in *iterable*. @@ -1850,6 +1850,10 @@ are always available. They are listed here in alphabetical order. key from each element in *iterable* (for example, ``key=str.lower``). The default value is ``None`` (compare the elements directly). + Alternative to key function is supplying a :class:`list` object + to *keylist* argument, which will determine the sort order. + Provided :class:`list` object will be modified in place. + *reverse* is a boolean value. If set to ``True``, then the list elements are sorted as if each comparison were reversed. @@ -1872,6 +1876,11 @@ are always available. They are listed here in alphabetical order. For sorting examples and a brief sorting tutorial, see :ref:`sortinghowto`. + .. versionchanged:: 3.15 + + Added *keylist* argument. + + .. decorator:: staticmethod Transform a method into a static method. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index a87898dadf4af6..3d3ac5bb4da61c 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1394,7 +1394,7 @@ application). :ref:`mutable ` sequence operations. Lists also provide the following additional method: - .. method:: list.sort(*, key=None, reverse=False) + .. method:: list.sort(*, key=None, keylist=None, reverse=False) This method sorts the list in place, using only ``<`` comparisons between items. Exceptions are not suppressed - if any comparison operations @@ -1414,6 +1414,10 @@ application). The :func:`functools.cmp_to_key` utility is available to convert a 2.x style *cmp* function to a *key* function. + Alternative to key function is supplying a :class:`list` object + to *keylist* argument, which will determine the sort order. + Provided :class:`list` object will be modified in place. + *reverse* is a boolean value. If set to ``True``, then the list elements are sorted as if each comparison were reversed. @@ -1436,6 +1440,11 @@ application). list appear empty for the duration, and raises :exc:`ValueError` if it can detect that the list has been mutated during a sort. + The same applies to *keylist* argument. + + .. versionchanged:: 3.15 + + Added *keylist* argument. .. _typesseq-tuple: diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 4882ddb4310fc2..4f67cd3fcbe55b 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -424,6 +424,13 @@ argparse default to ``True``. This enables suggestions for mistyped arguments by default. (Contributed by Jakob Schluse in :gh:`140450`.) +builtins +-------- +* :func:`sorted` and :meth:`list.sort` now accept *keylist* argument, + which takes :class:`list` object by the keys of which the sorting takes place. + *keylist* argument is sorted in-place (i.e. is modified). + (Contributed by Dominykas Grigonis in :gh:`142105`.) + calendar -------- diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 783747d1f01580..8191d7df3d0734 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1838,6 +1838,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keepends)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(key)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keyfile)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keylist)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keys)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kind)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 374617d8284b48..4e213ef7ece4af 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -561,6 +561,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(keepends) STRUCT_FOR_ID(key) STRUCT_FOR_ID(keyfile) + STRUCT_FOR_ID(keylist) STRUCT_FOR_ID(keys) STRUCT_FOR_ID(kind) STRUCT_FOR_ID(kw) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index a66c97f7f13677..b167033037c8d5 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1836,6 +1836,7 @@ extern "C" { INIT_ID(keepends), \ INIT_ID(key), \ INIT_ID(keyfile), \ + INIT_ID(keylist), \ INIT_ID(keys), \ INIT_ID(kind), \ INIT_ID(kw), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 2061b1d204951d..24a9fd3de23467 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2024,6 +2024,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(keylist); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(keys); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/test/test_sort.py b/Lib/test/test_sort.py index 2a7cfb7affaa21..772e352c530522 100644 --- a/Lib/test/test_sort.py +++ b/Lib/test/test_sort.py @@ -407,5 +407,70 @@ def test_none_in_tuples(self): #============================================================================== +class TestKeylist(unittest.TestCase): + def test_exclusivity_with_key(self): + msg = 'Only one of key and keylist can be provided.' + with self.assertRaisesRegex(ValueError, msg): + [].sort(key=1, keylist=1) + + def test_argtype(self): + for arg in [1, (), iter(())]: + msg = f"'{type(arg).__name__}' object is not a list" + with self.assertRaisesRegex(TypeError, msg): + [].sort(keylist=arg) + + def test_unequal_sizes(self): + msg = 'Lengths of input list and keylist differ.' + for arg in [[1, 2], [1, 2, 3, 4]]: + with self.assertRaisesRegex(ValueError, msg): + [1, 2, 3].sort(keylist=arg) + + def test_empty(self): + data = [] + keylist = [] + data.sort(keylist=keylist) + self.assertEqual(data, []) + self.assertEqual(keylist, []) + + def test_keylist_vs_key(self): + for reverse in [False, True]: + data = list(range(10)) + # NOTE: BORLAND32-RNG-LIKE + keyfunc = lambda x: ((22695477 * x + 1) % 2**32) % 10 + keylist = list(map(keyfunc, data)) + res_keyfunc = sorted(data, key=keyfunc, reverse=reverse) + res_keylist = sorted(data, keylist=keylist, reverse=reverse) + self.assertEqual(res_keyfunc, res_keylist) + + def test_mutability_plus(self): + for reverse in [False, True]: + for size in [10, 100, 1000]: + data = list(range(size)) + # NOTE: BORLAND32-RNG-LIKE + keyfunc = lambda x: ((22695477 * x + 1) % 2**32) % size + keylist = list(map(keyfunc, data)) + orig_keylist = list(keylist) + + expected_keylist = sorted(keylist, reverse=reverse) + result = sorted(data, keylist=keylist, reverse=reverse) + self.assertEqual(keylist, expected_keylist) + + # And for completeness check the result + rge = range(len(keylist)) + idxs = sorted(rge, key=orig_keylist.__getitem__, reverse=reverse) + expected_result = [data[i] for i in idxs] + self.assertEqual(result, expected_result) + + def test_mid_failure(self): + values = list(range(5)) + keylist = [2, 1, 3, 0, None] + with self.assertRaises(TypeError): + values.sort(keylist=keylist) + + expected_values = sorted(range(4), keylist=[2, 1, 3, 0]) + self.assertEqual(values, expected_values + [4]) + self.assertEqual(keylist, [0, 1, 2, 3, None]) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-30-10-30-14.gh-issue-142105.gL68B-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-30-10-30-14.gh-issue-142105.gL68B-.rst new file mode 100644 index 00000000000000..653fc90f1517c9 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-30-10-30-14.gh-issue-142105.gL68B-.rst @@ -0,0 +1 @@ +:func:`sorted` and :meth:`list.sort` now accept *keylist* argument, which takes :class:`list` object by the keys of which the sorting takes place. *keylist* argument is sorted in-place (i.e. is modified). diff --git a/Objects/clinic/listobject.c.h b/Objects/clinic/listobject.c.h index 26ba5b954336da..96187bb5c48583 100644 --- a/Objects/clinic/listobject.c.h +++ b/Objects/clinic/listobject.c.h @@ -195,7 +195,7 @@ list_pop(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } PyDoc_STRVAR(list_sort__doc__, -"sort($self, /, *, key=None, reverse=False)\n" +"sort($self, /, *, key=None, keylist=None, reverse=False)\n" "--\n" "\n" "Sort the list in ascending order and return None.\n" @@ -206,13 +206,19 @@ PyDoc_STRVAR(list_sort__doc__, "If a key function is given, apply it once to each list item and sort them,\n" "ascending or descending, according to their function values.\n" "\n" -"The reverse flag can be set to sort in descending order."); +"Alternative to key function is supplying a list to keylist argument,\n" +"which will determine sort order and will be modified in place.\n" +"\n" +"The reverse flag can be set to sort in descending order.\n" +"\n" +"Both key and keylist can not be used at the same time."); #define LIST_SORT_METHODDEF \ {"sort", _PyCFunction_CAST(list_sort), METH_FASTCALL|METH_KEYWORDS, list_sort__doc__}, static PyObject * -list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse); +list_sort_impl(PyListObject *self, PyObject *keyfunc, PyObject *keylist, + int reverse); static PyObject * list_sort(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -220,7 +226,7 @@ list_sort(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -229,7 +235,7 @@ list_sort(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(key), &_Py_ID(reverse), }, + .ob_item = { &_Py_ID(key), &_Py_ID(keylist), &_Py_ID(reverse), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -238,16 +244,17 @@ list_sort(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"key", "reverse", NULL}; + static const char * const _keywords[] = {"key", "keylist", "reverse", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sort", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; PyObject *keyfunc = Py_None; + PyObject *keylist = Py_None; int reverse = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, @@ -264,13 +271,19 @@ list_sort(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn goto skip_optional_kwonly; } } - reverse = PyObject_IsTrue(args[1]); + if (args[1]) { + keylist = args[1]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + reverse = PyObject_IsTrue(args[2]); if (reverse < 0) { goto exit; } skip_optional_kwonly: Py_BEGIN_CRITICAL_SECTION(self); - return_value = list_sort_impl((PyListObject *)self, keyfunc, reverse); + return_value = list_sort_impl((PyListObject *)self, keyfunc, keylist, reverse); Py_END_CRITICAL_SECTION(); exit: @@ -468,4 +481,4 @@ list___reversed__(PyObject *self, PyObject *Py_UNUSED(ignored)) { return list___reversed___impl((PyListObject *)self); } -/*[clinic end generated code: output=ae13fc2b56dc27c2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=08c07d813f75a628 input=a9049054013a1b77]*/ diff --git a/Objects/listobject.c b/Objects/listobject.c index 1722ea60cdc68f..e9740cf3989c3c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2870,6 +2870,48 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); } +#define DISABLE_LIST(self, saved_ob_size, saved_ob_item, saved_allocated) \ + do { \ + saved_ob_size = Py_SIZE(self); \ + saved_ob_item = self->ob_item; \ + saved_allocated = self->allocated; \ + Py_SET_SIZE(self, 0); \ + FT_ATOMIC_STORE_PTR_RELEASE(self->ob_item, NULL); \ + self->allocated = -1; /* any operation will reset it to >= 0 */ \ + } while (0) + +#ifdef Py_GIL_DISABLED + #define _REENABLE_LIST_GIL_PART(self, use_qsbr) \ + do { \ + ensure_shared_on_resize(self); \ + use_qsbr = _PyObject_GC_IS_SHARED(self); \ + } while (0) +#else + #define _REENABLE_LIST_GIL_PART(self, use_qsbr) \ + do { \ + use_qsbr = false; \ + } while (0) +#endif + +#define REENABLE_LIST(self, saved_ob_size, saved_ob_item, saved_allocated) \ + do { \ + final_ob_item = self->ob_item; \ + i = Py_SIZE(self); \ + Py_SET_SIZE(self, saved_ob_size); \ + FT_ATOMIC_STORE_PTR_RELEASE(self->ob_item, saved_ob_item); \ + FT_ATOMIC_STORE_SSIZE_RELAXED(self->allocated, saved_allocated); \ + if (final_ob_item != NULL) { \ + /* we cannot use list_clear() for this because it does not */ \ + /* guarantee that the list is really empty when it returns */ \ + while (--i >= 0) { \ + Py_XDECREF(final_ob_item[i]); \ + } \ + bool use_qsbr; \ + _REENABLE_LIST_GIL_PART(self, use_qsbr); \ + free_list_items(final_ob_item, use_qsbr); \ + } \ + } while (0) + /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the * list will be some permutation of its input state (nothing is lost or @@ -2882,6 +2924,7 @@ list.sort * key as keyfunc: object = None + keylist: object = None reverse: bool = False Sort the list in ascending order and return None. @@ -2892,12 +2935,18 @@ order of two equal elements is maintained). If a key function is given, apply it once to each list item and sort them, ascending or descending, according to their function values. +Alternative to key function is supplying a list to keylist argument, +which will determine sort order and will be modified in place. + The reverse flag can be set to sort in descending order. + +Both key and keylist can not be used at the same time. [clinic start generated code]*/ static PyObject * -list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) -/*[clinic end generated code: output=57b9f9c5e23fbe42 input=e4f6b6069181ad7d]*/ +list_sort_impl(PyListObject *self, PyObject *keyfunc, PyObject *keylist, + int reverse) +/*[clinic end generated code: output=ebb99a3e19f35128 input=5d44a191b821cc9a]*/ { MergeState ms; Py_ssize_t nremaining; @@ -2909,30 +2958,41 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) PyObject *result = NULL; /* guilty until proved innocent */ Py_ssize_t i; PyObject **keys; + // keylist vars + PyListObject *keylist_lob; + Py_ssize_t keylist_ob_size = -1; + Py_ssize_t keylist_allocated = -1; + PyObject **keylist_ob_item = NULL; assert(self != NULL); assert(PyList_Check(self)); if (keyfunc == Py_None) keyfunc = NULL; + if (keylist == Py_None) { + keylist = NULL; + } + else if (keylist != NULL) { + if (keyfunc != NULL) { + PyErr_SetString(PyExc_ValueError, + "Only one of key and keylist can be provided."); + return result; + } + } + /* The list is temporarily made empty, so that mutations performed * by comparison functions can't affect the slice of memory we're * sorting (allowing mutations during sorting is a core-dump * factory, since ob_item may change). */ - saved_ob_size = Py_SIZE(self); - saved_ob_item = self->ob_item; - saved_allocated = self->allocated; - Py_SET_SIZE(self, 0); - FT_ATOMIC_STORE_PTR_RELEASE(self->ob_item, NULL); - self->allocated = -1; /* any operation will reset it to >= 0 */ + DISABLE_LIST(self, saved_ob_size, saved_ob_item, saved_allocated); - if (keyfunc == NULL) { + if (keyfunc == NULL && keylist == NULL) { keys = NULL; lo.keys = saved_ob_item; lo.values = NULL; } - else { + else if (keyfunc != NULL) { if (saved_ob_size < MERGESTATE_TEMP_SIZE/2) /* Leverage stack space we allocated but won't otherwise use */ keys = &ms.temparray[saved_ob_size+1]; @@ -2958,7 +3018,27 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) lo.keys = keys; lo.values = saved_ob_item; } + else { + assert(keylist != NULL); + if (!PyList_Check(keylist)) { + PyErr_Format(PyExc_TypeError, + "'%.200s' object is not a list", + Py_TYPE(keylist)->tp_name); + goto keyfunc_fail; + } + keylist_lob = ((PyListObject *) keylist); + DISABLE_LIST(keylist_lob, keylist_ob_size, keylist_ob_item, keylist_allocated); + if (saved_ob_size != keylist_ob_size) { + PyErr_SetString(PyExc_ValueError, + "Lengths of input list and keylist differ."); + goto keylist_fail; + } + + keys = keylist_ob_item; + lo.keys = keys; + lo.values = saved_ob_item; + } /* The pre-sort check: here's where we decide which compare function to use. * How much optimization is safe? We test for homogeneity with respect to @@ -3117,7 +3197,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) succeed: result = Py_None; fail: - if (keys != NULL) { + if (keyfunc != NULL) { for (i = 0; i < saved_ob_size; i++) Py_DECREF(keys[i]); if (saved_ob_size >= MERGESTATE_TEMP_SIZE/2) @@ -3137,28 +3217,30 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) merge_freemem(&ms); -keyfunc_fail: - final_ob_item = self->ob_item; - i = Py_SIZE(self); - Py_SET_SIZE(self, saved_ob_size); - FT_ATOMIC_STORE_PTR_RELEASE(self->ob_item, saved_ob_item); - FT_ATOMIC_STORE_SSIZE_RELAXED(self->allocated, saved_allocated); - if (final_ob_item != NULL) { - /* we cannot use list_clear() for this because it does not - guarantee that the list is really empty when it returns */ - while (--i >= 0) { - Py_XDECREF(final_ob_item[i]); +keylist_fail: + if (keylist_ob_size != -1) { + if (keylist_lob->allocated != -1 && result != NULL) { + /* The user mucked with the keylist during the sort, + * and we don't already have another error to report. + */ + PyErr_SetString(PyExc_ValueError, "keylist modified during sort"); + result = NULL; } -#ifdef Py_GIL_DISABLED - ensure_shared_on_resize(self); - bool use_qsbr = _PyObject_GC_IS_SHARED(self); -#else - bool use_qsbr = false; -#endif - free_list_items(final_ob_item, use_qsbr); + + if (reverse && keylist_ob_size > 1) + reverse_slice(keylist_ob_item, keylist_ob_item + keylist_ob_size); + + REENABLE_LIST(keylist_lob, keylist_ob_size, keylist_ob_item, keylist_allocated); + final_ob_item = NULL; } + +keyfunc_fail: + REENABLE_LIST(self, saved_ob_size, saved_ob_item, saved_allocated); return Py_XNewRef(result); } +#undef DISABLE_LIST +#undef _REENABLE_LIST_GIL_PART +#undef REENABLE_LIST #undef IFLT #undef ISLT @@ -3170,7 +3252,7 @@ PyList_Sort(PyObject *v) return -1; } Py_BEGIN_CRITICAL_SECTION(v); - v = list_sort_impl((PyListObject *)v, NULL, 0); + v = list_sort_impl((PyListObject *)v, NULL, NULL, 0); Py_END_CRITICAL_SECTION(); if (v == NULL) return -1; diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index c2d780ac9b9270..cc46cd890c0f0a 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2638,22 +2638,29 @@ sorted as builtin_sorted iterable as seq: object key as keyfunc: object = None + keylist: object = None reverse: object = False Return a new list containing all items from the iterable in ascending order. -A custom key function can be supplied to customize the sort order, and the -reverse flag can be set to request the result in descending order. +A custom key function can be supplied to customize the sort order. +Alternative to key function is supplying a list to keylist argument, +which will determine sort order and will be modified in place. +The reverse flag can be set to request the result in descending order. +Both key and keylist can not be used at the same time. [end disabled clinic input]*/ PyDoc_STRVAR(builtin_sorted__doc__, -"sorted($module, iterable, /, *, key=None, reverse=False)\n" +"sorted($module, iterable, /, *, key=None, keylist=None, reverse=False)\n" "--\n" "\n" "Return a new list containing all items from the iterable in ascending order.\n" "\n" -"A custom key function can be supplied to customize the sort order, and the\n" -"reverse flag can be set to request the result in descending order."); +"A custom key function can be supplied to customize the sort order.\n" +"Alternative to key function is supplying a list to keylist argument,\n" +"which will determine sort order and will be modified in place.\n" +"The reverse flag can be set to request the result in descending order.\n" +"Both key and keylist can not be used at the same time."); #define BUILTIN_SORTED_METHODDEF \ {"sorted", _PyCFunction_CAST(builtin_sorted), METH_FASTCALL | METH_KEYWORDS, builtin_sorted__doc__},