Skip to content

Commit be3c131

Browse files
chris-eiblFidget-Spinnerbrandtbucherhugovk
authored
GH-139922: Tail calling for MSVC (VS 2026) (GH-143068)
Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Co-authored-by: Brandt Bucher <brandt@python.org> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
1 parent 665d280 commit be3c131

File tree

12 files changed

+520
-675
lines changed

12 files changed

+520
-675
lines changed

.github/workflows/tail-call.yml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,19 +79,17 @@ jobs:
7979
with:
8080
python-version: '3.11'
8181

82-
- name: Native Windows (debug)
82+
- name: Native Windows MSVC (release)
8383
if: runner.os == 'Windows' && matrix.architecture != 'ARM64'
8484
shell: cmd
8585
run: |
86-
choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}.1.0
87-
set PlatformToolset=clangcl
88-
set LLVMToolsVersion=${{ matrix.llvm }}.1.0
89-
set LLVMInstallDir=C:\Program Files\LLVM
90-
call ./PCbuild/build.bat --tail-call-interp -d -p ${{ matrix.architecture }}
91-
call ./PCbuild/rt.bat -d -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
86+
choco install visualstudio2026buildtools --no-progress -y --force --params "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --locale en-US --passive"
87+
$env:PATH = "C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\MSBuild\Current\bin;$env:PATH"
88+
./PCbuild/build.bat --tail-call-interp -c Release -p ${{ matrix.architecture }} "/p:PlatformToolset=v145"
89+
./PCbuild/rt.bat -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3
9290
9391
# No tests (yet):
94-
- name: Emulated Windows (release)
92+
- name: Emulated Windows Clang (release)
9593
if: runner.os == 'Windows' && matrix.architecture == 'ARM64'
9694
shell: cmd
9795
run: |

Include/internal/pycore_ceval.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,17 @@ _Py_VectorCall_StackRefSteal(
415415
int total_args,
416416
_PyStackRef kwnames);
417417

418+
PyAPI_FUNC(PyObject*)
419+
_Py_VectorCallInstrumentation_StackRefSteal(
420+
_PyStackRef callable,
421+
_PyStackRef* arguments,
422+
int total_args,
423+
_PyStackRef kwnames,
424+
bool call_instrumentation,
425+
_PyInterpreterFrame* frame,
426+
_Py_CODEUNIT* this_instr,
427+
PyThreadState* tstate);
428+
418429
PyAPI_FUNC(PyObject *)
419430
_Py_BuiltinCallFast_StackRefSteal(
420431
_PyStackRef callable,
@@ -464,6 +475,11 @@ _Py_assert_within_stack_bounds(
464475
_PyInterpreterFrame *frame, _PyStackRef *stack_pointer,
465476
const char *filename, int lineno);
466477

478+
// Like PyMapping_GetOptionalItem, but returns the PyObject* instead of taking
479+
// it as an out parameter. This helps MSVC's escape analysis when used with
480+
// tail calling.
481+
PyAPI_FUNC(PyObject*) _PyMapping_GetOptionalItem2(PyObject* obj, PyObject* key, int* err);
482+
467483
#ifdef __cplusplus
468484
}
469485
#endif
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Allow building CPython with the tail calling interpreter on Visual Studio 2026 MSVC. This provides a performance gain over the prior interpreter for MSVC. Patch by Ken Jin, Brandt Bucher, and Chris Eibl. With help from the MSVC team including Hulon Jenkins.

Objects/abstract.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,14 @@ PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result)
224224
return 0;
225225
}
226226

227+
PyObject*
228+
_PyMapping_GetOptionalItem2(PyObject *obj, PyObject *key, int *err)
229+
{
230+
PyObject* result;
231+
*err = PyMapping_GetOptionalItem(obj, key, &result);
232+
return result;
233+
}
234+
227235
int
228236
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
229237
{

PCbuild/pythoncore.vcxproj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,9 @@
600600
<ClCompile Include="..\Python\bltinmodule.c" />
601601
<ClCompile Include="..\Python\bootstrap_hash.c" />
602602
<ClCompile Include="..\Python\brc.c" />
603-
<ClCompile Include="..\Python\ceval.c" />
603+
<ClCompile Include="..\Python\ceval.c">
604+
<AdditionalOptions Condition="'$(UseTailCallInterp)' == 'true' and $(PlatformToolset) != 'ClangCL'">/std:clatest %(AdditionalOptions)</AdditionalOptions>
605+
</ClCompile>
604606
<ClCompile Include="..\Python\codecs.c" />
605607
<ClCompile Include="..\Python\codegen.c" />
606608
<ClCompile Include="..\Python\compile.c" />

Python/bytecodes.c

Lines changed: 57 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1507,8 +1507,8 @@ dummy_func(
15071507
}
15081508

15091509
inst(LOAD_BUILD_CLASS, ( -- bc)) {
1510-
PyObject *bc_o;
1511-
int err = PyMapping_GetOptionalItem(BUILTINS(), &_Py_ID(__build_class__), &bc_o);
1510+
int err;
1511+
PyObject *bc_o = _PyMapping_GetOptionalItem2(BUILTINS(), &_Py_ID(__build_class__), &err);
15121512
ERROR_IF(err < 0);
15131513
if (bc_o == NULL) {
15141514
_PyErr_SetString(tstate, PyExc_NameError,
@@ -1711,8 +1711,9 @@ dummy_func(
17111711

17121712
inst(LOAD_FROM_DICT_OR_GLOBALS, (mod_or_class_dict -- v)) {
17131713
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
1714-
PyObject *v_o;
1715-
int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o);
1714+
int err;
1715+
PyObject *v_o = _PyMapping_GetOptionalItem2(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &err);
1716+
17161717
PyStackRef_CLOSE(mod_or_class_dict);
17171718
ERROR_IF(err < 0);
17181719
if (v_o == NULL) {
@@ -1735,11 +1736,11 @@ dummy_func(
17351736
else {
17361737
/* Slow-path if globals or builtins is not a dict */
17371738
/* namespace 1: globals */
1738-
int err = PyMapping_GetOptionalItem(GLOBALS(), name, &v_o);
1739+
v_o = _PyMapping_GetOptionalItem2(GLOBALS(), name, &err);
17391740
ERROR_IF(err < 0);
17401741
if (v_o == NULL) {
17411742
/* namespace 2: builtins */
1742-
int err = PyMapping_GetOptionalItem(BUILTINS(), name, &v_o);
1743+
v_o = _PyMapping_GetOptionalItem2(BUILTINS(), name, &err);
17431744
ERROR_IF(err < 0);
17441745
if (v_o == NULL) {
17451746
_PyEval_FormatExcCheckArg(
@@ -1898,14 +1899,14 @@ dummy_func(
18981899
}
18991900

19001901
inst(LOAD_FROM_DICT_OR_DEREF, (class_dict_st -- value)) {
1901-
PyObject *value_o;
19021902
PyObject *name;
19031903
PyObject *class_dict = PyStackRef_AsPyObjectBorrow(class_dict_st);
19041904

19051905
assert(class_dict);
19061906
assert(oparg >= 0 && oparg < _PyFrame_GetCode(frame)->co_nlocalsplus);
19071907
name = PyTuple_GET_ITEM(_PyFrame_GetCode(frame)->co_localsplusnames, oparg);
1908-
int err = PyMapping_GetOptionalItem(class_dict, name, &value_o);
1908+
int err;
1909+
PyObject* value_o = _PyMapping_GetOptionalItem2(class_dict, name, &err);
19091910
if (err < 0) {
19101911
ERROR_NO_POP();
19111912
}
@@ -2074,14 +2075,14 @@ dummy_func(
20742075
}
20752076

20762077
inst(SETUP_ANNOTATIONS, (--)) {
2077-
PyObject *ann_dict;
20782078
if (LOCALS() == NULL) {
20792079
_PyErr_Format(tstate, PyExc_SystemError,
20802080
"no locals found when setting up annotations");
20812081
ERROR_IF(true);
20822082
}
20832083
/* check if __annotations__ in locals()... */
2084-
int err = PyMapping_GetOptionalItem(LOCALS(), &_Py_ID(__annotations__), &ann_dict);
2084+
int err;
2085+
PyObject* ann_dict = _PyMapping_GetOptionalItem2(LOCALS(), &_Py_ID(__annotations__), &err);
20852086
ERROR_IF(err < 0);
20862087
if (ann_dict == NULL) {
20872088
ann_dict = PyDict_New();
@@ -2185,8 +2186,12 @@ dummy_func(
21852186
}
21862187
// we make no attempt to optimize here; specializations should
21872188
// handle any case whose performance we care about
2188-
PyObject *stack[] = {class, self};
2189-
PyObject *super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
2189+
PyObject *super;
2190+
{
2191+
// scope to tell MSVC that stack is not escaping
2192+
PyObject *stack[] = {class, self};
2193+
super = PyObject_Vectorcall(global_super, stack, oparg & 2, NULL);
2194+
}
21902195
if (opcode == INSTRUMENTED_LOAD_SUPER_ATTR) {
21912196
PyObject *arg = oparg & 2 ? class : &_PyInstrumentation_MISSING;
21922197
if (super == NULL) {
@@ -2245,8 +2250,13 @@ dummy_func(
22452250
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 2);
22462251
PyTypeObject *cls = (PyTypeObject *)class;
22472252
int method_found = 0;
2248-
PyObject *attr_o = _PySuper_Lookup(cls, self, name,
2249-
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? &method_found : NULL);
2253+
PyObject *attr_o;
2254+
{
2255+
// scope to tell MSVC that method_found_ptr is not escaping
2256+
int *method_found_ptr = &method_found;
2257+
attr_o = _PySuper_Lookup(cls, self, name,
2258+
Py_TYPE(self)->tp_getattro == PyObject_GenericGetAttr ? method_found_ptr : NULL);
2259+
}
22502260
if (attr_o == NULL) {
22512261
ERROR_NO_POP();
22522262
}
@@ -3472,10 +3482,14 @@ dummy_func(
34723482
}
34733483
assert(PyStackRef_IsTaggedInt(lasti));
34743484
(void)lasti; // Shut up compiler warning if asserts are off
3475-
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
3476-
int has_self = !PyStackRef_IsNull(exit_self);
3477-
PyObject *res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
3478-
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
3485+
PyObject* res_o;
3486+
{
3487+
// scope to tell MSVC that stack is not escaping
3488+
PyObject *stack[5] = {NULL, PyStackRef_AsPyObjectBorrow(exit_self), exc, val_o, tb};
3489+
int has_self = !PyStackRef_IsNull(exit_self);
3490+
res_o = PyObject_Vectorcall(exit_func_o, stack + 2 - has_self,
3491+
(3 + has_self) | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
3492+
}
34793493
Py_XDECREF(original_tb);
34803494
ERROR_IF(res_o == NULL);
34813495
res = PyStackRef_FromPyObjectSteal(res_o);
@@ -3707,36 +3721,18 @@ dummy_func(
37073721
frame->return_offset = INSTRUCTION_SIZE;
37083722
DISPATCH_INLINED(new_frame);
37093723
}
3710-
/* Callable is not a normal Python function */
3711-
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
3712-
if (CONVERSION_FAILED(args_o)) {
3713-
DECREF_INPUTS();
3714-
ERROR_IF(true);
3715-
}
3716-
PyObject *res_o = PyObject_Vectorcall(
3717-
callable_o, args_o,
3718-
total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
3719-
NULL);
3720-
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
3721-
if (opcode == INSTRUMENTED_CALL) {
3722-
PyObject *arg = total_args == 0 ?
3723-
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
3724-
if (res_o == NULL) {
3725-
_Py_call_instrumentation_exc2(
3726-
tstate, PY_MONITORING_EVENT_C_RAISE,
3727-
frame, this_instr, callable_o, arg);
3728-
}
3729-
else {
3730-
int err = _Py_call_instrumentation_2args(
3731-
tstate, PY_MONITORING_EVENT_C_RETURN,
3732-
frame, this_instr, callable_o, arg);
3733-
if (err < 0) {
3734-
Py_CLEAR(res_o);
3735-
}
3736-
}
3737-
}
3738-
assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
3739-
DECREF_INPUTS();
3724+
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
3725+
callable,
3726+
arguments,
3727+
total_args,
3728+
PyStackRef_NULL,
3729+
opcode == INSTRUMENTED_CALL,
3730+
frame,
3731+
this_instr,
3732+
tstate);
3733+
DEAD(args);
3734+
DEAD(self_or_null);
3735+
DEAD(callable);
37403736
ERROR_IF(res_o == NULL);
37413737
res = PyStackRef_FromPyObjectSteal(res_o);
37423738
}
@@ -4587,35 +4583,19 @@ dummy_func(
45874583
frame->return_offset = INSTRUCTION_SIZE;
45884584
DISPATCH_INLINED(new_frame);
45894585
}
4590-
/* Callable is not a normal Python function */
4591-
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
4592-
if (CONVERSION_FAILED(args_o)) {
4593-
DECREF_INPUTS();
4594-
ERROR_IF(true);
4595-
}
4596-
PyObject *res_o = PyObject_Vectorcall(
4597-
callable_o, args_o,
4598-
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
4599-
kwnames_o);
4600-
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
4601-
if (opcode == INSTRUMENTED_CALL_KW) {
4602-
PyObject *arg = total_args == 0 ?
4603-
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
4604-
if (res_o == NULL) {
4605-
_Py_call_instrumentation_exc2(
4606-
tstate, PY_MONITORING_EVENT_C_RAISE,
4607-
frame, this_instr, callable_o, arg);
4608-
}
4609-
else {
4610-
int err = _Py_call_instrumentation_2args(
4611-
tstate, PY_MONITORING_EVENT_C_RETURN,
4612-
frame, this_instr, callable_o, arg);
4613-
if (err < 0) {
4614-
Py_CLEAR(res_o);
4615-
}
4616-
}
4617-
}
4618-
DECREF_INPUTS();
4586+
PyObject* res_o = _Py_VectorCallInstrumentation_StackRefSteal(
4587+
callable,
4588+
arguments,
4589+
total_args,
4590+
kwnames,
4591+
opcode == INSTRUMENTED_CALL_KW,
4592+
frame,
4593+
this_instr,
4594+
tstate);
4595+
DEAD(kwnames);
4596+
DEAD(args);
4597+
DEAD(self_or_null);
4598+
DEAD(callable);
46194599
ERROR_IF(res_o == NULL);
46204600
res = PyStackRef_FromPyObjectSteal(res_o);
46214601
}

Python/ceval.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,65 @@ _Py_VectorCall_StackRefSteal(
10711071
return res;
10721072
}
10731073

1074+
PyObject*
1075+
_Py_VectorCallInstrumentation_StackRefSteal(
1076+
_PyStackRef callable,
1077+
_PyStackRef* arguments,
1078+
int total_args,
1079+
_PyStackRef kwnames,
1080+
bool call_instrumentation,
1081+
_PyInterpreterFrame* frame,
1082+
_Py_CODEUNIT* this_instr,
1083+
PyThreadState* tstate)
1084+
{
1085+
PyObject* res;
1086+
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
1087+
if (CONVERSION_FAILED(args_o)) {
1088+
res = NULL;
1089+
goto cleanup;
1090+
}
1091+
PyObject* callable_o = PyStackRef_AsPyObjectBorrow(callable);
1092+
PyObject* kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
1093+
int positional_args = total_args;
1094+
if (kwnames_o != NULL) {
1095+
positional_args -= (int)PyTuple_GET_SIZE(kwnames_o);
1096+
}
1097+
res = PyObject_Vectorcall(
1098+
callable_o, args_o,
1099+
positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
1100+
kwnames_o);
1101+
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
1102+
if (call_instrumentation) {
1103+
PyObject* arg = total_args == 0 ?
1104+
&_PyInstrumentation_MISSING : PyStackRef_AsPyObjectBorrow(arguments[0]);
1105+
if (res == NULL) {
1106+
_Py_call_instrumentation_exc2(
1107+
tstate, PY_MONITORING_EVENT_C_RAISE,
1108+
frame, this_instr, callable_o, arg);
1109+
}
1110+
else {
1111+
int err = _Py_call_instrumentation_2args(
1112+
tstate, PY_MONITORING_EVENT_C_RETURN,
1113+
frame, this_instr, callable_o, arg);
1114+
if (err < 0) {
1115+
Py_CLEAR(res);
1116+
}
1117+
}
1118+
}
1119+
assert((res != NULL) ^ (PyErr_Occurred() != NULL));
1120+
cleanup:
1121+
PyStackRef_XCLOSE(kwnames);
1122+
// arguments is a pointer into the GC visible stack,
1123+
// so we must NULL out values as we clear them.
1124+
for (int i = total_args - 1; i >= 0; i--) {
1125+
_PyStackRef tmp = arguments[i];
1126+
arguments[i] = PyStackRef_NULL;
1127+
PyStackRef_CLOSE(tmp);
1128+
}
1129+
PyStackRef_CLOSE(callable);
1130+
return res;
1131+
}
1132+
10741133
PyObject *
10751134
_Py_BuiltinCallFast_StackRefSteal(
10761135
_PyStackRef callable,

Python/ceval_macros.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,19 @@
8787
# elif defined(_MSC_VER) && (_MSC_VER < 1950)
8888
# error "You need at least VS 2026 / PlatformToolset v145 for tail calling."
8989
# endif
90-
91-
// Note: [[clang::musttail]] works for GCC 15, but not __attribute__((musttail)) at the moment.
92-
# define Py_MUSTTAIL [[clang::musttail]]
93-
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
94-
Py_PRESERVE_NONE_CC typedef PyObject* (*py_tail_call_funcptr)(TAIL_CALL_PARAMS);
90+
# if defined(_MSC_VER) && !defined(__clang__)
91+
# define Py_MUSTTAIL [[msvc::musttail]]
92+
# define Py_PRESERVE_NONE_CC __preserve_none
93+
# else
94+
# define Py_MUSTTAIL __attribute__((musttail))
95+
# define Py_PRESERVE_NONE_CC __attribute__((preserve_none))
96+
# endif
97+
typedef PyObject *(Py_PRESERVE_NONE_CC *py_tail_call_funcptr)(TAIL_CALL_PARAMS);
9598

9699
# define DISPATCH_TABLE_VAR instruction_funcptr_table
97100
# define DISPATCH_TABLE instruction_funcptr_handler_table
98101
# define TRACING_DISPATCH_TABLE instruction_funcptr_tracing_table
99-
# define TARGET(op) Py_PRESERVE_NONE_CC PyObject *_TAIL_CALL_##op(TAIL_CALL_PARAMS)
102+
# define TARGET(op) Py_NO_INLINE PyObject *Py_PRESERVE_NONE_CC _TAIL_CALL_##op(TAIL_CALL_PARAMS)
100103

101104
# define DISPATCH_GOTO() \
102105
do { \

0 commit comments

Comments
 (0)