Skip to content

Commit b4b7324

Browse files
authored
gh-143421: Use new buffer to save optimized uops (GH-143682)
1 parent d51c01a commit b4b7324

File tree

7 files changed

+193
-139
lines changed

7 files changed

+193
-139
lines changed

Include/internal/pycore_optimizer_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ typedef struct _JitOptContext {
129129
JitOptRef *n_consumed;
130130
JitOptRef *limit;
131131
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
132+
_PyUOpInstruction *out_buffer;
133+
int out_len;
132134
} JitOptContext;
133135

134136

Include/internal/pycore_tstate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ typedef struct _PyJitTracerState {
6060
_PyJitTracerTranslatorState translator_state;
6161
JitOptContext opt_context;
6262
_PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
63+
_PyUOpInstruction out_buffer[UOP_MAX_TRACE_LENGTH];
6364
} _PyJitTracerState;
6465

6566
#endif

Python/optimizer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,7 @@ uop_optimize(
15351535
if (length <= 0) {
15361536
return length;
15371537
}
1538+
buffer = _tstate->jit_tracer_state->out_buffer;
15381539
}
15391540
assert(length < UOP_MAX_TRACE_LENGTH/2);
15401541
assert(length >= 1);

Python/optimizer_analysis.c

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,18 @@ incorrect_keys(PyObject *obj, uint32_t version)
176176
#define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack))
177177
#define STACK_SIZE() ((int)(ctx->frame->stack_len))
178178

179+
static inline int
180+
is_terminator_uop(const _PyUOpInstruction *uop)
181+
{
182+
int opcode = uop->opcode;
183+
return (
184+
opcode == _EXIT_TRACE ||
185+
opcode == _JUMP_TO_TOP ||
186+
opcode == _DYNAMIC_EXIT ||
187+
opcode == _DEOPT
188+
);
189+
}
190+
179191
#define CURRENT_FRAME_IS_INIT_SHIM() (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup))
180192

181193
#define GETLOCAL(idx) ((ctx->frame->locals[idx]))
@@ -185,6 +197,22 @@ incorrect_keys(PyObject *obj, uint32_t version)
185197
(INST)->oparg = ARG; \
186198
(INST)->operand0 = OPERAND;
187199

200+
#define ADD_OP(OP, ARG, OPERAND) add_op(ctx, this_instr, (OP), (ARG), (OPERAND))
201+
202+
static inline void
203+
add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
204+
uint16_t opcode, uint16_t oparg, uintptr_t operand0)
205+
{
206+
_PyUOpInstruction *out = &ctx->out_buffer[ctx->out_len];
207+
out->opcode = (opcode);
208+
out->format = this_instr->format;
209+
out->oparg = (oparg);
210+
out->target = this_instr->target;
211+
out->operand0 = (operand0);
212+
out->operand1 = this_instr->operand1;
213+
ctx->out_len++;
214+
}
215+
188216
/* Shortened forms for convenience, used in optimizer_bytecodes.c */
189217
#define sym_is_not_null _Py_uop_sym_is_not_null
190218
#define sym_is_const _Py_uop_sym_is_const
@@ -252,7 +280,7 @@ optimize_to_bool(
252280
bool insert_mode)
253281
{
254282
if (sym_matches_type(value, &PyBool_Type)) {
255-
REPLACE_OP(this_instr, _NOP, 0, 0);
283+
ADD_OP(_NOP, 0, 0);
256284
*result_ptr = value;
257285
return 1;
258286
}
@@ -262,17 +290,17 @@ optimize_to_bool(
262290
int opcode = insert_mode ?
263291
_INSERT_1_LOAD_CONST_INLINE_BORROW :
264292
_POP_TOP_LOAD_CONST_INLINE_BORROW;
265-
REPLACE_OP(this_instr, opcode, 0, (uintptr_t)load);
293+
ADD_OP(opcode, 0, (uintptr_t)load);
266294
*result_ptr = sym_new_const(ctx, load);
267295
return 1;
268296
}
269297
return 0;
270298
}
271299

272300
static void
273-
eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit)
301+
eliminate_pop_guard(_PyUOpInstruction *this_instr, JitOptContext *ctx, bool exit)
274302
{
275-
REPLACE_OP(this_instr, _POP_TOP, 0, 0);
303+
ADD_OP(_POP_TOP, 0, 0);
276304
if (exit) {
277305
REPLACE_OP((this_instr+1), _EXIT_TRACE, 0, 0);
278306
this_instr[1].target = this_instr->target;
@@ -289,7 +317,7 @@ lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction
289317
PyObject *lookup = _PyType_Lookup(type, name);
290318
if (lookup) {
291319
int opcode = _Py_IsImmortal(lookup) ? immortal : mortal;
292-
REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup);
320+
ADD_OP(opcode, 0, (uintptr_t)lookup);
293321
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
294322
_Py_BloomFilter_Add(dependencies, type);
295323
return sym_new_const(ctx, lookup);
@@ -382,6 +410,8 @@ optimize_uops(
382410
JitOptContext *ctx = &tstate->jit_tracer_state->opt_context;
383411
uint32_t opcode = UINT16_MAX;
384412

413+
ctx->out_buffer = tstate->jit_tracer_state->out_buffer;
414+
385415
// Make sure that watchers are set up
386416
PyInterpreterState *interp = _PyInterpreterState_GET();
387417
if (interp->dict_state.watchers[GLOBALS_WATCHER_ID] == NULL) {
@@ -398,6 +428,8 @@ optimize_uops(
398428
ctx->curr_frame_depth++;
399429
ctx->frame = frame;
400430

431+
ctx->out_len = 0;
432+
401433
_PyUOpInstruction *this_instr = NULL;
402434
JitOptRef *stack_pointer = ctx->frame->stack_pointer;
403435

@@ -431,6 +463,10 @@ optimize_uops(
431463
DPRINTF(1, "\nUnknown opcode in abstract interpreter\n");
432464
Py_UNREACHABLE();
433465
}
466+
// If no ADD_OP was called during this iteration, copy the original instruction
467+
if (ctx->out_len == i) {
468+
ctx->out_buffer[ctx->out_len++] = *this_instr;
469+
}
434470
assert(ctx->frame != NULL);
435471
if (!CURRENT_FRAME_IS_INIT_SHIM()) {
436472
DPRINTF(3, " stack_level %d\n", STACK_LEVEL());
@@ -459,7 +495,21 @@ optimize_uops(
459495
/* Either reached the end or cannot optimize further, but there
460496
* would be no benefit in retrying later */
461497
_Py_uop_abstractcontext_fini(ctx);
462-
return trace_len;
498+
// Check that the trace ends with a proper terminator
499+
if (ctx->out_len > 0) {
500+
_PyUOpInstruction *last_uop = &ctx->out_buffer[ctx->out_len - 1];
501+
if (!is_terminator_uop(last_uop)) {
502+
// Copy remaining uops from original trace until we find a terminator
503+
for (int i = ctx->out_len; i < trace_len; i++) {
504+
ctx->out_buffer[ctx->out_len++] = trace[i];
505+
if (is_terminator_uop(&trace[i])) {
506+
break;
507+
}
508+
}
509+
}
510+
}
511+
512+
return ctx->out_len;
463513

464514
error:
465515
DPRINTF(3, "\n");
@@ -631,7 +681,7 @@ _Py_uop_analyze_and_optimize(
631681

632682
assert(length > 0);
633683

634-
length = remove_unneeded_uops(buffer, length);
684+
length = remove_unneeded_uops(tstate->jit_tracer_state->out_buffer, length);
635685
assert(length > 0);
636686

637687
OPT_STAT_INC(optimizer_successes);

0 commit comments

Comments
 (0)