Skip to content

Commit ecc71c4

Browse files
Merge branch 'main' into encodings-cache
2 parents ef28517 + 99e2c5e commit ecc71c4

File tree

105 files changed

+3715
-2925
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+3715
-2925
lines changed

Doc/library/asyncio-task.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ Running Tasks Concurrently
557557
provides stronger safety guarantees than *gather* for scheduling a nesting of subtasks:
558558
if a task (or a subtask, a task scheduled by a task)
559559
raises an exception, *TaskGroup* will, while *gather* will not,
560-
cancel the remaining scheduled tasks).
560+
cancel the remaining scheduled tasks.
561561

562562
.. _asyncio_example_gather:
563563

Doc/tutorial/datastructures.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,9 @@ Curly braces or the :func:`set` function can be used to create sets. Note: to
493493
create an empty set you have to use ``set()``, not ``{}``; the latter creates an
494494
empty dictionary, a data structure that we discuss in the next section.
495495

496+
Because sets are unordered, iterating over them or printing them can
497+
produce the elements in a different order than you expect.
498+
496499
Here is a brief demonstration::
497500

498501
>>> basket = {'apple', 'orange', 'apple', 'pear', 'orange', 'banana'}

Doc/whatsnew/3.15.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,11 +1286,11 @@ Upgraded JIT compiler
12861286

12871287
Results from the `pyperformance <https://github.com/python/pyperformance>`__
12881288
benchmark suite report
1289-
`4-5% <https://raw.githubusercontent.com/facebookexperimental/free-threading-benchmarking/refs/heads/main/results/bm-20260110-3.15.0a3%2B-aa8578d-JIT/bm-20260110-vultr-x86_64-python-aa8578dc54df2af9daa3-3.15.0a3%2B-aa8578d-vs-base.svg>`__
1289+
`5-6% <https://doesjitgobrrr.com/run/2026-03-11>`__
12901290
geometric mean performance improvement for the JIT over the standard CPython
12911291
interpreter built with all optimizations enabled on x86-64 Linux. On AArch64
12921292
macOS, the JIT has a
1293-
`7-8% <https://raw.githubusercontent.com/facebookexperimental/free-threading-benchmarking/refs/heads/main/results/bm-20260110-3.15.0a3%2B-aa8578d-JIT/bm-20260110-macm4pro-arm64-python-aa8578dc54df2af9daa3-3.15.0a3%2B-aa8578d-vs-base.svg>`__
1293+
`8-9% <https://doesjitgobrrr.com/run/2026-03-11>`__
12941294
speedup over the :ref:`tail calling interpreter <whatsnew314-tail-call-interpreter>`
12951295
with all optimizations enabled. The speedups for JIT
12961296
builds versus no JIT builds range from roughly 15% slowdown to over

Include/internal/pycore_backoff.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,20 @@ initial_jump_backoff_counter(_PyOptimizationConfig *opt_config)
135135
opt_config->jump_backward_initial_backoff);
136136
}
137137

138+
// This needs to be around 2-4x of JUMP_BACKWARD_INITIAL_VALUE
139+
// The reasoning is that we always want loop traces to form and inline
140+
// functions before functions themselves warm up and link to them instead
141+
// of inlining.
142+
#define RESUME_INITIAL_VALUE 8190
143+
#define RESUME_INITIAL_BACKOFF 6
144+
static inline _Py_BackoffCounter
145+
initial_resume_backoff_counter(_PyOptimizationConfig *opt_config)
146+
{
147+
return make_backoff_counter(
148+
opt_config->resume_initial_value,
149+
opt_config->resume_initial_backoff);
150+
}
151+
138152
/* Initial exit temperature.
139153
* Must be larger than ADAPTIVE_COOLDOWN_VALUE,
140154
* otherwise when a side exit warms up we may construct

Include/internal/pycore_code.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ PyAPI_FUNC(void) _Py_Specialize_ToBool(_PyStackRef value, _Py_CODEUNIT *instr);
323323
PyAPI_FUNC(void) _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr);
324324
PyAPI_FUNC(void) _Py_GatherStats_GetIter(_PyStackRef iterable);
325325
PyAPI_FUNC(void) _Py_Specialize_CallFunctionEx(_PyStackRef func_st, _Py_CODEUNIT *instr);
326+
PyAPI_FUNC(void) _Py_Specialize_Resume(_Py_CODEUNIT *instr, PyThreadState *tstate, _PyInterpreterFrame *frame);
326327

327328
// Utility functions for reading/writing 32/64-bit values in the inline caches.
328329
// Great care should be taken to ensure that these functions remain correct and

Include/internal/pycore_interp_structs.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ extern "C" {
1414
#include "pycore_structs.h" // PyHamtObject
1515
#include "pycore_tstate.h" // _PyThreadStateImpl
1616
#include "pycore_typedefs.h" // _PyRuntimeState
17+
#include "pycore_uop.h" // _PyBloomFilter
1718

1819
#define CODE_MAX_WATCHERS 8
1920
#define CONTEXT_MAX_WATCHERS 8
@@ -413,6 +414,9 @@ typedef struct _PyOptimizationConfig {
413414
uint16_t jump_backward_initial_value;
414415
uint16_t jump_backward_initial_backoff;
415416

417+
uint16_t resume_initial_value;
418+
uint16_t resume_initial_backoff;
419+
416420
// JIT optimization thresholds
417421
uint16_t side_exit_initial_value;
418422
uint16_t side_exit_initial_backoff;
@@ -972,7 +976,10 @@ struct _is {
972976

973977
// Optimization configuration (thresholds and flags for JIT and interpreter)
974978
_PyOptimizationConfig opt_config;
975-
struct _PyExecutorObject *executor_list_head;
979+
_PyBloomFilter *executor_blooms; // Contiguous bloom filter array
980+
struct _PyExecutorObject **executor_ptrs; // Corresponding executor pointer array
981+
size_t executor_count; // Number of valid executors
982+
size_t executor_capacity; // Array capacity
976983
struct _PyExecutorObject *executor_deletion_list_head;
977984
struct _PyExecutorObject *cold_executor;
978985
struct _PyExecutorObject *cold_dynamic_executor;

Include/internal/pycore_magic_number.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ Known values:
292292
Python 3.15a4 3659 (Add CALL_FUNCTION_EX specialization)
293293
Python 3.15a4 3660 (Change generator preamble code)
294294
Python 3.15a4 3661 (Lazy imports IMPORT_NAME opcode changes)
295+
Python 3.15a6 3662 (Add counter to RESUME)
295296
296297
297298
Python 3.16 will start with 3700
@@ -305,7 +306,7 @@ PC/launcher.c must also be updated.
305306
306307
*/
307308

308-
#define PYC_MAGIC_NUMBER 3661
309+
#define PYC_MAGIC_NUMBER 3662
309310
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
310311
(little-endian) and then appending b'\r\n'. */
311312
#define PYC_MAGIC_NUMBER_TOKEN \

Include/internal/pycore_opcode_metadata.h

Lines changed: 14 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_optimizer.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ typedef struct {
128128
bool cold;
129129
uint8_t pending_deletion;
130130
int32_t index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
131-
_PyBloomFilter bloom;
132-
_PyExecutorLinkListNode links;
131+
int32_t bloom_array_idx; // Index in interp->executor_blooms/executor_ptrs.
132+
_PyExecutorLinkListNode links; // Used by deletion list.
133133
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
134134
} _PyVMData;
135135

@@ -157,7 +157,7 @@ typedef struct _PyExecutorObject {
157157
// Export for '_opcode' shared extension (JIT compiler).
158158
PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
159159

160-
void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
160+
int _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
161161
void _Py_ExecutorDetach(_PyExecutorObject *);
162162
void _Py_BloomFilter_Init(_PyBloomFilter *);
163163
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
@@ -361,6 +361,8 @@ _PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame,
361361
int oparg, _PyExecutorObject *current_executor);
362362

363363
PyAPI_FUNC(void) _PyJit_FinalizeTracing(PyThreadState *tstate, int err);
364+
PyAPI_FUNC(bool) _PyJit_EnterExecutorShouldStopTracing(int og_opcode);
365+
364366
void _PyPrintExecutor(_PyExecutorObject *executor, const _PyUOpInstruction *marker);
365367
void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
366368

Include/internal/pycore_uop.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ typedef struct _PyUOpInstruction{
3636
} _PyUOpInstruction;
3737

3838
// This is the length of the trace we translate initially.
39-
#ifdef Py_DEBUG
39+
#if defined(Py_DEBUG) && defined(_Py_JIT)
4040
// With asserts, the stencils are a lot larger
4141
#define UOP_MAX_TRACE_LENGTH 1000
4242
#else

0 commit comments

Comments
 (0)