@@ -367,19 +367,6 @@ partial_descr_get(PyObject *self, PyObject *obj, PyObject *type)
367367 return PyMethod_New (self , obj );
368368}
369369
370- /* Merging keyword arguments using the vectorcall convention is messy, so
371- * if we would need to do that, we stop using vectorcall and fall back
372- * to using partial_call() instead. */
373- Py_NO_INLINE static PyObject *
374- partial_vectorcall_fallback (PyThreadState * tstate , partialobject * pto ,
375- PyObject * const * args , size_t nargsf ,
376- PyObject * kwnames )
377- {
378- pto -> vectorcall = NULL ;
379- Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
380- return _PyObject_MakeTpCall (tstate , (PyObject * )pto , args , nargs , kwnames );
381- }
382-
383370static PyObject *
384371partial_vectorcall (PyObject * self , PyObject * const * args ,
385372 size_t nargsf , PyObject * kwnames )
@@ -388,10 +375,7 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
388375 PyThreadState * tstate = _PyThreadState_GET ();
389376 Py_ssize_t nargs = PyVectorcall_NARGS (nargsf );
390377
391- /* pto->kw is mutable, so need to check every time */
392- if (PyDict_GET_SIZE (pto -> kw )) {
393- return partial_vectorcall_fallback (tstate , pto , args , nargsf , kwnames );
394- }
378+ /* Placeholder check */
395379 Py_ssize_t pto_phcount = pto -> phcount ;
396380 if (nargs < pto_phcount ) {
397381 PyErr_Format (PyExc_TypeError ,
@@ -400,50 +384,143 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
400384 return NULL ;
401385 }
402386
403- Py_ssize_t nargskw = nargs ;
404- if (kwnames != NULL ) {
405- nargskw += PyTuple_GET_SIZE (kwnames );
406- }
407-
408387 PyObject * * pto_args = _PyTuple_ITEMS (pto -> args );
409388 Py_ssize_t pto_nargs = PyTuple_GET_SIZE (pto -> args );
389+ Py_ssize_t pto_nkwds = PyDict_GET_SIZE (pto -> kw );
390+ Py_ssize_t nkwds = kwnames == NULL ? 0 : PyTuple_GET_SIZE (kwnames );
391+ Py_ssize_t nargskw = nargs + nkwds ;
392+
393+ /* Special cases */
394+ if (!pto_nkwds ) {
395+ /* Fast path if we're called without arguments */
396+ if (nargskw == 0 ) {
397+ return _PyObject_VectorcallTstate (tstate , pto -> fn , pto_args ,
398+ pto_nargs , NULL );
399+ }
410400
411- /* Fast path if we're called without arguments */
412- if (nargskw == 0 ) {
413- return _PyObject_VectorcallTstate (tstate , pto -> fn ,
414- pto_args , pto_nargs , NULL );
401+ /* Use PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
402+ * positional argument. */
403+ if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET )) {
404+ PyObject * * newargs = (PyObject * * )args - 1 ;
405+ PyObject * tmp = newargs [0 ];
406+ newargs [0 ] = pto_args [0 ];
407+ PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn , newargs ,
408+ nargs + 1 , kwnames );
409+ newargs [0 ] = tmp ;
410+ return ret ;
411+ }
415412 }
416413
417- /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
418- * positional argument */
419- if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET )) {
420- PyObject * * newargs = (PyObject * * )args - 1 ;
421- PyObject * tmp = newargs [0 ];
422- newargs [0 ] = pto_args [0 ];
423- PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn ,
424- newargs , nargs + 1 , kwnames );
425- newargs [0 ] = tmp ;
426- return ret ;
427- }
414+ /* Total sizes */
415+ Py_ssize_t tot_nargs = pto_nargs + nargs - pto_phcount ;
416+ Py_ssize_t tot_nkwds = pto_nkwds + nkwds ;
417+ Py_ssize_t tot_nargskw = tot_nargs + tot_nkwds ;
428418
429- PyObject * small_stack [ _PY_FASTCALL_SMALL_STACK ];
430- PyObject * * stack ;
419+ PyObject * pto_kw_merged = NULL ; // pto_kw with duplicates merged (if any)
420+ PyObject * tot_kwnames ;
431421
432- Py_ssize_t tot_nargskw = pto_nargs + nargskw - pto_phcount ;
433- if (tot_nargskw <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
422+ /* Allocate Stack
423+ * Note, _PY_FASTCALL_SMALL_STACK is optimal for positional only
424+ * This case might have keyword arguments
425+ * furthermore, it might use extra stack space for temporary key storage
426+ * thus, double small_stack size is used, which is 10 * 8 = 80 bytes */
427+ PyObject * small_stack [_PY_FASTCALL_SMALL_STACK * 2 ];
428+ PyObject * * tmp_stack , * * stack ;
429+ Py_ssize_t init_stack_size = tot_nargskw ;
430+ if (pto_nkwds ) {
431+ // If pto_nkwds, allocate additional space for temporary new keys
432+ init_stack_size += nkwds ;
433+ }
434+ if (init_stack_size <= (Py_ssize_t )Py_ARRAY_LENGTH (small_stack )) {
434435 stack = small_stack ;
435436 }
436437 else {
437- stack = PyMem_Malloc (tot_nargskw * sizeof (PyObject * ));
438+ stack = PyMem_Malloc (init_stack_size * sizeof (PyObject * ));
438439 if (stack == NULL ) {
439- PyErr_NoMemory ();
440- return NULL ;
440+ return PyErr_NoMemory ();
441441 }
442442 }
443443
444- Py_ssize_t tot_nargs ;
444+ /* Copy keywords to stack */
445+ if (!pto_nkwds ) {
446+ tot_kwnames = kwnames ;
447+ if (nkwds ) {
448+ /* if !pto_nkwds & nkwds, then simply append kw */
449+ memcpy (stack + tot_nargs , args + nargs , nkwds * sizeof (PyObject * ));
450+ }
451+ }
452+ else {
453+ /* stack is now [<positionals>, <pto_kwds>, <kwds>, <kwds_keys>]
454+ * Will resize later to [<positionals>, <merged_kwds>] */
455+ PyObject * key , * val ;
456+
457+ /* Merge kw to pto_kw or add to tail (if not duplicate) */
458+ Py_ssize_t n_tail = 0 ;
459+ for (Py_ssize_t i = 0 ; i < nkwds ; ++ i ) {
460+ key = PyTuple_GET_ITEM (kwnames , i );
461+ val = args [nargs + i ];
462+ if (PyDict_Contains (pto -> kw , key )) {
463+ if (pto_kw_merged == NULL ) {
464+ pto_kw_merged = PyDict_Copy (pto -> kw );
465+ if (pto_kw_merged == NULL ) {
466+ goto error ;
467+ }
468+ }
469+ if (PyDict_SetItem (pto_kw_merged , key , val ) < 0 ) {
470+ Py_DECREF (pto_kw_merged );
471+ goto error ;
472+ }
473+ }
474+ else {
475+ /* Copy keyword tail to stack */
476+ stack [tot_nargs + pto_nkwds + n_tail ] = val ;
477+ stack [tot_nargskw + n_tail ] = key ;
478+ n_tail ++ ;
479+ }
480+ }
481+ Py_ssize_t n_merges = nkwds - n_tail ;
482+
483+ /* Create total kwnames */
484+ tot_kwnames = PyTuple_New (tot_nkwds - n_merges );
485+ if (tot_kwnames == NULL ) {
486+ Py_XDECREF (pto_kw_merged );
487+ goto error ;
488+ }
489+ for (Py_ssize_t i = 0 ; i < n_tail ; ++ i ) {
490+ key = Py_NewRef (stack [tot_nargskw + i ]);
491+ PyTuple_SET_ITEM (tot_kwnames , pto_nkwds + i , key );
492+ }
493+
494+ /* Copy pto_keywords with overlapping call keywords merged
495+ * Note, tail is already coppied. */
496+ Py_ssize_t pos = 0 , i = 0 ;
497+ while (PyDict_Next (n_merges ? pto_kw_merged : pto -> kw , & pos , & key , & val )) {
498+ assert (i < pto_nkwds );
499+ PyTuple_SET_ITEM (tot_kwnames , i , Py_NewRef (key ));
500+ stack [tot_nargs + i ] = val ;
501+ i ++ ;
502+ }
503+ assert (i == pto_nkwds );
504+ Py_XDECREF (pto_kw_merged );
505+
506+ /* Resize Stack if the removing overallocation saves some noticable memory
507+ * NOTE: This whole block can be removed without breaking anything */
508+ Py_ssize_t noveralloc = n_merges + nkwds ;
509+ if (stack != small_stack && noveralloc > 6 && noveralloc > init_stack_size / 10 ) {
510+ tmp_stack = PyMem_Realloc (stack , (tot_nargskw - n_merges ) * sizeof (PyObject * ));
511+ if (tmp_stack == NULL ) {
512+ Py_DECREF (tot_kwnames );
513+ if (stack != small_stack ) {
514+ PyMem_Free (stack );
515+ }
516+ return PyErr_NoMemory ();
517+ }
518+ stack = tmp_stack ;
519+ }
520+ }
521+
522+ /* Copy Positionals to stack */
445523 if (pto_phcount ) {
446- tot_nargs = pto_nargs + nargs - pto_phcount ;
447524 Py_ssize_t j = 0 ; // New args index
448525 for (Py_ssize_t i = 0 ; i < pto_nargs ; i ++ ) {
449526 if (pto_args [i ] == pto -> placeholder ) {
@@ -455,22 +532,31 @@ partial_vectorcall(PyObject *self, PyObject *const *args,
455532 }
456533 }
457534 assert (j == pto_phcount );
458- if (nargskw > pto_phcount ) {
459- memcpy (stack + pto_nargs , args + j , (nargskw - j ) * sizeof (PyObject * ));
535+ /* Add remaining args from new_args */
536+ if (nargs > pto_phcount ) {
537+ memcpy (stack + pto_nargs , args + j , (nargs - j ) * sizeof (PyObject * ));
460538 }
461539 }
462540 else {
463- tot_nargs = pto_nargs + nargs ;
464- /* Copy to new stack, using borrowed references */
465541 memcpy (stack , pto_args , pto_nargs * sizeof (PyObject * ));
466- memcpy (stack + pto_nargs , args , nargskw * sizeof (PyObject * ));
542+ memcpy (stack + pto_nargs , args , nargs * sizeof (PyObject * ));
467543 }
468- PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn ,
469- stack , tot_nargs , kwnames );
544+
545+ PyObject * ret = _PyObject_VectorcallTstate (tstate , pto -> fn , stack ,
546+ tot_nargs , tot_kwnames );
470547 if (stack != small_stack ) {
471548 PyMem_Free (stack );
472549 }
550+ if (pto_nkwds ) {
551+ Py_DECREF (tot_kwnames );
552+ }
473553 return ret ;
554+
555+ error :
556+ if (stack != small_stack ) {
557+ PyMem_Free (stack );
558+ }
559+ return NULL ;
474560}
475561
476562/* Set pto->vectorcall depending on the parameters of the partial object */
0 commit comments