@@ -176,6 +176,18 @@ incorrect_keys(PyObject *obj, uint32_t version)
176176#define STACK_LEVEL () ((int)(stack_pointer - ctx->frame->stack))
177177#define STACK_SIZE () ((int)(ctx->frame->stack_len))
178178
179+ static inline int
180+ is_terminator_uop (const _PyUOpInstruction * uop )
181+ {
182+ int opcode = uop -> opcode ;
183+ return (
184+ opcode == _EXIT_TRACE ||
185+ opcode == _JUMP_TO_TOP ||
186+ opcode == _DYNAMIC_EXIT ||
187+ opcode == _DEOPT
188+ );
189+ }
190+
179191#define CURRENT_FRAME_IS_INIT_SHIM () (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup))
180192
181193#define GETLOCAL (idx ) ((ctx->frame->locals[idx]))
@@ -185,6 +197,22 @@ incorrect_keys(PyObject *obj, uint32_t version)
185197 (INST)->oparg = ARG; \
186198 (INST)->operand0 = OPERAND;
187199
200+ #define ADD_OP (OP , ARG , OPERAND ) add_op(ctx, this_instr, (OP), (ARG), (OPERAND))
201+
202+ static inline void
203+ add_op (JitOptContext * ctx , _PyUOpInstruction * this_instr ,
204+ uint16_t opcode , uint16_t oparg , uintptr_t operand0 )
205+ {
206+ _PyUOpInstruction * out = & ctx -> out_buffer [ctx -> out_len ];
207+ out -> opcode = (opcode );
208+ out -> format = this_instr -> format ;
209+ out -> oparg = (oparg );
210+ out -> target = this_instr -> target ;
211+ out -> operand0 = (operand0 );
212+ out -> operand1 = this_instr -> operand1 ;
213+ ctx -> out_len ++ ;
214+ }
215+
188216/* Shortened forms for convenience, used in optimizer_bytecodes.c */
189217#define sym_is_not_null _Py_uop_sym_is_not_null
190218#define sym_is_const _Py_uop_sym_is_const
@@ -252,7 +280,7 @@ optimize_to_bool(
252280 bool insert_mode )
253281{
254282 if (sym_matches_type (value , & PyBool_Type )) {
255- REPLACE_OP ( this_instr , _NOP , 0 , 0 );
283+ ADD_OP ( _NOP , 0 , 0 );
256284 * result_ptr = value ;
257285 return 1 ;
258286 }
@@ -262,17 +290,17 @@ optimize_to_bool(
262290 int opcode = insert_mode ?
263291 _INSERT_1_LOAD_CONST_INLINE_BORROW :
264292 _POP_TOP_LOAD_CONST_INLINE_BORROW ;
265- REPLACE_OP ( this_instr , opcode , 0 , (uintptr_t )load );
293+ ADD_OP ( opcode , 0 , (uintptr_t )load );
266294 * result_ptr = sym_new_const (ctx , load );
267295 return 1 ;
268296 }
269297 return 0 ;
270298}
271299
272300static void
273- eliminate_pop_guard (_PyUOpInstruction * this_instr , bool exit )
301+ eliminate_pop_guard (_PyUOpInstruction * this_instr , JitOptContext * ctx , bool exit )
274302{
275- REPLACE_OP ( this_instr , _POP_TOP , 0 , 0 );
303+ ADD_OP ( _POP_TOP , 0 , 0 );
276304 if (exit ) {
277305 REPLACE_OP ((this_instr + 1 ), _EXIT_TRACE , 0 , 0 );
278306 this_instr [1 ].target = this_instr -> target ;
@@ -289,7 +317,7 @@ lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction
289317 PyObject * lookup = _PyType_Lookup (type , name );
290318 if (lookup ) {
291319 int opcode = _Py_IsImmortal (lookup ) ? immortal : mortal ;
292- REPLACE_OP ( this_instr , opcode , 0 , (uintptr_t )lookup );
320+ ADD_OP ( opcode , 0 , (uintptr_t )lookup );
293321 PyType_Watch (TYPE_WATCHER_ID , (PyObject * )type );
294322 _Py_BloomFilter_Add (dependencies , type );
295323 return sym_new_const (ctx , lookup );
@@ -382,6 +410,8 @@ optimize_uops(
382410 JitOptContext * ctx = & tstate -> jit_tracer_state -> opt_context ;
383411 uint32_t opcode = UINT16_MAX ;
384412
413+ ctx -> out_buffer = tstate -> jit_tracer_state -> out_buffer ;
414+
385415 // Make sure that watchers are set up
386416 PyInterpreterState * interp = _PyInterpreterState_GET ();
387417 if (interp -> dict_state .watchers [GLOBALS_WATCHER_ID ] == NULL ) {
@@ -398,6 +428,8 @@ optimize_uops(
398428 ctx -> curr_frame_depth ++ ;
399429 ctx -> frame = frame ;
400430
431+ ctx -> out_len = 0 ;
432+
401433 _PyUOpInstruction * this_instr = NULL ;
402434 JitOptRef * stack_pointer = ctx -> frame -> stack_pointer ;
403435
@@ -431,6 +463,10 @@ optimize_uops(
431463 DPRINTF (1 , "\nUnknown opcode in abstract interpreter\n" );
432464 Py_UNREACHABLE ();
433465 }
466+ // If no ADD_OP was called during this iteration, copy the original instruction
467+ if (ctx -> out_len == i ) {
468+ ctx -> out_buffer [ctx -> out_len ++ ] = * this_instr ;
469+ }
434470 assert (ctx -> frame != NULL );
435471 if (!CURRENT_FRAME_IS_INIT_SHIM ()) {
436472 DPRINTF (3 , " stack_level %d\n" , STACK_LEVEL ());
@@ -459,7 +495,21 @@ optimize_uops(
459495 /* Either reached the end or cannot optimize further, but there
460496 * would be no benefit in retrying later */
461497 _Py_uop_abstractcontext_fini (ctx );
462- return trace_len ;
498+ // Check that the trace ends with a proper terminator
499+ if (ctx -> out_len > 0 ) {
500+ _PyUOpInstruction * last_uop = & ctx -> out_buffer [ctx -> out_len - 1 ];
501+ if (!is_terminator_uop (last_uop )) {
502+ // Copy remaining uops from original trace until we find a terminator
503+ for (int i = ctx -> out_len ; i < trace_len ; i ++ ) {
504+ ctx -> out_buffer [ctx -> out_len ++ ] = trace [i ];
505+ if (is_terminator_uop (& trace [i ])) {
506+ break ;
507+ }
508+ }
509+ }
510+ }
511+
512+ return ctx -> out_len ;
463513
464514error :
465515 DPRINTF (3 , "\n" );
@@ -631,7 +681,7 @@ _Py_uop_analyze_and_optimize(
631681
632682 assert (length > 0 );
633683
634- length = remove_unneeded_uops (buffer , length );
684+ length = remove_unneeded_uops (tstate -> jit_tracer_state -> out_buffer , length );
635685 assert (length > 0 );
636686
637687 OPT_STAT_INC (optimizer_successes );
0 commit comments