Skip to content

Commit ae53da5

Browse files
authored
GH-143493: Conform to spec for generator expressions while supporting virtual iterators (GH-143569)
* Moves the `GET_ITER` instruction into the generator function preamble. This means the the iterable is converted into an iterator during generator creation, as documented, but keeps it in the same code object allowing optimization.
1 parent c461aa9 commit ae53da5

File tree

14 files changed

+117
-88
lines changed

14 files changed

+117
-88
lines changed

Include/internal/pycore_flowgraph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ int _PyCfg_OptimizeCodeUnit(struct _PyCfgBuilder *g, PyObject *consts, PyObject
2727
struct _PyCfgBuilder* _PyCfg_FromInstructionSequence(_PyInstructionSequence *seq);
2828
int _PyCfg_ToInstructionSequence(struct _PyCfgBuilder *g, _PyInstructionSequence *seq);
2929
int _PyCfg_OptimizedCfgToInstructionSequence(struct _PyCfgBuilder *g, _PyCompile_CodeUnitMetadata *umd,
30-
int code_flags, int *stackdepth, int *nlocalsplus,
30+
int *stackdepth, int *nlocalsplus,
3131
_PyInstructionSequence *seq);
3232

3333
PyCodeObject *

Include/internal/pycore_instruction_sequence.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ int _PyInstructionSequence_SetAnnotationsCode(_PyInstructionSequence *seq,
7373
_PyInstructionSequence *annotations);
7474
int _PyInstructionSequence_AddNested(_PyInstructionSequence *seq, _PyInstructionSequence *nested);
7575
void PyInstructionSequence_Fini(_PyInstructionSequence *seq);
76+
_PyInstruction _PyInstructionSequence_GetInstruction(_PyInstructionSequence *seq, int pos);
7677

7778
extern PyTypeObject _PyInstructionSequence_Type;
7879
#define _PyInstructionSequence_Check(v) Py_IS_TYPE((v), &_PyInstructionSequence_Type)

Include/internal/pycore_magic_number.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ Known values:
290290
Python 3.15a4 3657 (Add BINARY_OP_SUBSCR_USTR_INT)
291291
Python 3.15a4 3658 (Optimize bytecode for list/set called on genexp)
292292
Python 3.15a4 3659 (Add CALL_FUNCTION_EX specialization)
293+
Python 3.15a4 3660 (Change generator preamble code)
293294
294295
295296
Python 3.16 will start with 3700
@@ -303,7 +304,7 @@ PC/launcher.c must also be updated.
303304
304305
*/
305306

306-
#define PYC_MAGIC_NUMBER 3659
307+
#define PYC_MAGIC_NUMBER 3660
307308
/* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes
308309
(little-endian) and then appending b'\r\n'. */
309310
#define PYC_MAGIC_NUMBER_TOKEN \

Lib/test/test_compile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,7 @@ def return_genexp():
12981298
x
12991299
in
13001300
y)
1301-
genexp_lines = [0, 4, 2, 0, 4]
1301+
genexp_lines = [4, 0, 4, 2, 0, 4]
13021302

13031303
genexp_code = return_genexp.__code__.co_consts[0]
13041304
code_lines = self.get_code_lines(genexp_code)

Lib/test/test_dis.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -875,11 +875,11 @@ def foo(x):
875875
Disassembly of <code object <genexpr> at 0x..., file "%s", line %d>:
876876
-- COPY_FREE_VARS 1
877877
878-
%4d RETURN_GENERATOR
878+
%4d LOAD_FAST 0 (.0)
879+
GET_ITER
880+
RETURN_GENERATOR
879881
POP_TOP
880882
L1: RESUME 0
881-
LOAD_FAST 0 (.0)
882-
GET_ITER
883883
L2: FOR_ITER 14 (to L3)
884884
STORE_FAST 1 (z)
885885
LOAD_DEREF 2 (x)
@@ -897,7 +897,7 @@ def foo(x):
897897
-- L4: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR)
898898
RERAISE 1
899899
ExceptionTable:
900-
L1 to L4 -> L4 [0] lasti
900+
L1 to L4 -> L4 [2] lasti
901901
""" % (dis_nested_1,
902902
__file__,
903903
_h.__code__.co_firstlineno + 3,

Lib/test/test_generators.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -357,21 +357,25 @@ def gen(it):
357357
yield x
358358
return gen(range(10))
359359

360-
def process_tests(self, get_generator):
361-
for obj in self.iterables:
362-
g_obj = get_generator(obj)
363-
with self.subTest(g_obj=g_obj, obj=obj):
364-
self.assertListEqual(list(g_obj), list(obj))
360+
def process_tests(self, get_generator, changes_iterable):
361+
if changes_iterable:
362+
for obj in self.iterables:
363+
g_obj = get_generator(obj)
364+
with self.subTest(g_obj=g_obj, obj=obj):
365+
self.assertListEqual(list(g_obj), list(obj))
365366

366-
g_iter = get_generator(iter(obj))
367-
with self.subTest(g_iter=g_iter, obj=obj):
368-
self.assertListEqual(list(g_iter), list(obj))
367+
g_iter = get_generator(iter(obj))
368+
with self.subTest(g_iter=g_iter, obj=obj):
369+
self.assertListEqual(list(g_iter), list(obj))
369370

370371
err_regex = "'.*' object is not iterable"
371372
for obj in self.non_iterables:
372373
g_obj = get_generator(obj)
373374
with self.subTest(g_obj=g_obj):
374-
self.assertRaisesRegex(TypeError, err_regex, list, g_obj)
375+
if changes_iterable:
376+
self.assertRaisesRegex(TypeError, err_regex, list, g_obj)
377+
else:
378+
next(g_obj)
375379

376380
def test_modify_f_locals(self):
377381
def modify_f_locals(g, local, obj):
@@ -384,22 +388,22 @@ def get_generator_genexpr(obj):
384388
def get_generator_genfunc(obj):
385389
return modify_f_locals(self.genfunc(), 'it', obj)
386390

387-
self.process_tests(get_generator_genexpr)
388-
self.process_tests(get_generator_genfunc)
391+
self.process_tests(get_generator_genexpr, False)
392+
self.process_tests(get_generator_genfunc, True)
389393

390394
def test_new_gen_from_gi_code(self):
391395
def new_gen_from_gi_code(g, obj):
392396
generator_func = types.FunctionType(g.gi_code, {})
393397
return generator_func(obj)
394398

395-
def get_generator_genexpr(obj):
396-
return new_gen_from_gi_code(self.genexpr(), obj)
399+
for obj in self.non_iterables:
400+
with self.assertRaises(TypeError):
401+
new_gen_from_gi_code(self.genexpr(), obj)
397402

398403
def get_generator_genfunc(obj):
399404
return new_gen_from_gi_code(self.genfunc(), obj)
400405

401-
self.process_tests(get_generator_genexpr)
402-
self.process_tests(get_generator_genfunc)
406+
self.process_tests(get_generator_genfunc, True)
403407

404408

405409
class ExceptionTest(unittest.TestCase):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Generator expressions in 3.15 now conform to the documented behavior when
2+
the iterable does not support iteration. This matches the behavior in 3.14
3+
and earlier

Python/bytecodes.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3189,14 +3189,15 @@ dummy_func(
31893189
#ifdef Py_STATS
31903190
_Py_GatherStats_GetIter(iterable);
31913191
#endif
3192-
/* before: [obj]; after [getiter(obj)] */
31933192
PyTypeObject *tp = PyStackRef_TYPE(iterable);
31943193
if (tp == &PyTuple_Type || tp == &PyList_Type) {
3194+
/* Leave iterable on stack and pushed tagged 0 */
31953195
iter = iterable;
31963196
DEAD(iterable);
31973197
index_or_null = PyStackRef_TagInt(0);
31983198
}
31993199
else {
3200+
/* Pop iterable, and push iterator then NULL */
32003201
PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable));
32013202
PyStackRef_CLOSE(iterable);
32023203
ERROR_IF(iter_o == NULL);
@@ -5033,7 +5034,7 @@ dummy_func(
50335034
PyFunctionObject *func = (PyFunctionObject *)PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
50345035
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
50355036
ERROR_IF(gen == NULL);
5036-
assert(STACK_LEVEL() == 0);
5037+
assert(STACK_LEVEL() <= 2);
50375038
SAVE_STACK();
50385039
_PyInterpreterFrame *gen_frame = &gen->gi_iframe;
50395040
frame->instr_ptr++;

Python/codegen.c

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -227,19 +227,25 @@ static int codegen_call_helper(compiler *c, location loc,
227227
static int codegen_try_except(compiler *, stmt_ty);
228228
static int codegen_try_star_except(compiler *, stmt_ty);
229229

230+
typedef enum {
231+
ITERABLE_IN_LOCAL = 0,
232+
ITERABLE_ON_STACK = 1,
233+
ITERATOR_ON_STACK = 2,
234+
} IterStackPosition;
235+
230236
static int codegen_sync_comprehension_generator(
231237
compiler *c, location loc,
232238
asdl_comprehension_seq *generators, int gen_index,
233239
int depth,
234240
expr_ty elt, expr_ty val, int type,
235-
int iter_on_stack);
241+
IterStackPosition iter_pos);
236242

237243
static int codegen_async_comprehension_generator(
238244
compiler *c, location loc,
239245
asdl_comprehension_seq *generators, int gen_index,
240246
int depth,
241247
expr_ty elt, expr_ty val, int type,
242-
int iter_on_stack);
248+
IterStackPosition iter_pos);
243249

244250
static int codegen_pattern(compiler *, pattern_ty, pattern_context *);
245251
static int codegen_match(compiler *, stmt_ty);
@@ -665,6 +671,18 @@ codegen_enter_scope(compiler *c, identifier name, int scope_type,
665671
if (scope_type == COMPILE_SCOPE_MODULE) {
666672
loc.lineno = 0;
667673
}
674+
/* Add the generator prefix instructions. */
675+
676+
PySTEntryObject *ste = SYMTABLE_ENTRY(c);
677+
if (ste->ste_coroutine || ste->ste_generator) {
678+
/* Note that RETURN_GENERATOR + POP_TOP have a net stack effect
679+
* of 0. This is because RETURN_GENERATOR pushes the generator
680+
before returning. */
681+
location loc = LOCATION(lineno, lineno, -1, -1);
682+
ADDOP(c, loc, RETURN_GENERATOR);
683+
ADDOP(c, loc, POP_TOP);
684+
}
685+
668686
ADDOP_I(c, loc, RESUME, RESUME_AT_FUNC_START);
669687
if (scope_type == COMPILE_SCOPE_MODULE) {
670688
ADDOP(c, loc, ANNOTATIONS_PLACEHOLDER);
@@ -1187,10 +1205,15 @@ codegen_wrap_in_stopiteration_handler(compiler *c)
11871205
{
11881206
NEW_JUMP_TARGET_LABEL(c, handler);
11891207

1190-
/* Insert SETUP_CLEANUP at start */
1208+
/* Insert SETUP_CLEANUP just before RESUME */
1209+
instr_sequence *seq = INSTR_SEQUENCE(c);
1210+
int resume = 0;
1211+
while (_PyInstructionSequence_GetInstruction(seq, resume).i_opcode != RESUME) {
1212+
resume++;
1213+
}
11911214
RETURN_IF_ERROR(
11921215
_PyInstructionSequence_InsertInstruction(
1193-
INSTR_SEQUENCE(c), 0,
1216+
seq, resume,
11941217
SETUP_CLEANUP, handler.id, NO_LOCATION));
11951218

11961219
ADDOP_LOAD_CONST(c, NO_LOCATION, Py_None);
@@ -4401,18 +4424,18 @@ codegen_comprehension_generator(compiler *c, location loc,
44014424
asdl_comprehension_seq *generators, int gen_index,
44024425
int depth,
44034426
expr_ty elt, expr_ty val, int type,
4404-
int iter_on_stack)
4427+
IterStackPosition iter_pos)
44054428
{
44064429
comprehension_ty gen;
44074430
gen = (comprehension_ty)asdl_seq_GET(generators, gen_index);
44084431
if (gen->is_async) {
44094432
return codegen_async_comprehension_generator(
44104433
c, loc, generators, gen_index, depth, elt, val, type,
4411-
iter_on_stack);
4434+
iter_pos);
44124435
} else {
44134436
return codegen_sync_comprehension_generator(
44144437
c, loc, generators, gen_index, depth, elt, val, type,
4415-
iter_on_stack);
4438+
iter_pos);
44164439
}
44174440
}
44184441

@@ -4421,7 +4444,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
44214444
asdl_comprehension_seq *generators,
44224445
int gen_index, int depth,
44234446
expr_ty elt, expr_ty val, int type,
4424-
int iter_on_stack)
4447+
IterStackPosition iter_pos)
44254448
{
44264449
/* generate code for the iterator, then each of the ifs,
44274450
and then write to the element */
@@ -4433,7 +4456,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
44334456
comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators,
44344457
gen_index);
44354458

4436-
if (!iter_on_stack) {
4459+
if (iter_pos == ITERABLE_IN_LOCAL) {
44374460
if (gen_index == 0) {
44384461
assert(METADATA(c)->u_argcount == 1);
44394462
ADDOP_I(c, loc, LOAD_FAST, 0);
@@ -4468,9 +4491,12 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
44684491
}
44694492

44704493
if (IS_JUMP_TARGET_LABEL(start)) {
4471-
depth += 2;
4472-
ADDOP(c, LOC(gen->iter), GET_ITER);
4494+
if (iter_pos != ITERATOR_ON_STACK) {
4495+
ADDOP(c, LOC(gen->iter), GET_ITER);
4496+
depth += 1;
4497+
}
44734498
USE_LABEL(c, start);
4499+
depth += 1;
44744500
ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor);
44754501
}
44764502
VISIT(c, expr, gen->target);
@@ -4486,7 +4512,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc,
44864512
RETURN_IF_ERROR(
44874513
codegen_comprehension_generator(c, loc,
44884514
generators, gen_index, depth,
4489-
elt, val, type, 0));
4515+
elt, val, type, ITERABLE_IN_LOCAL));
44904516
}
44914517

44924518
location elt_loc = LOC(elt);
@@ -4545,7 +4571,7 @@ codegen_async_comprehension_generator(compiler *c, location loc,
45454571
asdl_comprehension_seq *generators,
45464572
int gen_index, int depth,
45474573
expr_ty elt, expr_ty val, int type,
4548-
int iter_on_stack)
4574+
IterStackPosition iter_pos)
45494575
{
45504576
NEW_JUMP_TARGET_LABEL(c, start);
45514577
NEW_JUMP_TARGET_LABEL(c, send);
@@ -4555,7 +4581,7 @@ codegen_async_comprehension_generator(compiler *c, location loc,
45554581
comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators,
45564582
gen_index);
45574583

4558-
if (!iter_on_stack) {
4584+
if (iter_pos == ITERABLE_IN_LOCAL) {
45594585
if (gen_index == 0) {
45604586
assert(METADATA(c)->u_argcount == 1);
45614587
ADDOP_I(c, loc, LOAD_FAST, 0);
@@ -4565,7 +4591,9 @@ codegen_async_comprehension_generator(compiler *c, location loc,
45654591
VISIT(c, expr, gen->iter);
45664592
}
45674593
}
4568-
ADDOP(c, LOC(gen->iter), GET_AITER);
4594+
if (iter_pos != ITERATOR_ON_STACK) {
4595+
ADDOP(c, LOC(gen->iter), GET_AITER);
4596+
}
45694597

45704598
USE_LABEL(c, start);
45714599
/* Runtime will push a block here, so we need to account for that */
@@ -4795,11 +4823,13 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
47954823
location loc = LOC(e);
47964824

47974825
outermost = (comprehension_ty) asdl_seq_GET(generators, 0);
4826+
IterStackPosition iter_state;
47984827
if (is_inlined) {
47994828
VISIT(c, expr, outermost->iter);
48004829
if (push_inlined_comprehension_state(c, loc, entry, &inline_state)) {
48014830
goto error;
48024831
}
4832+
iter_state = ITERABLE_ON_STACK;
48034833
}
48044834
else {
48054835
/* Receive outermost iter as an implicit argument */
@@ -4810,6 +4840,23 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
48104840
(void *)e, e->lineno, NULL, &umd) < 0) {
48114841
goto error;
48124842
}
4843+
if (type == COMP_GENEXP) {
4844+
/* Insert GET_ITER before RETURN_GENERATOR.
4845+
https://docs.python.org/3/reference/expressions.html#generator-expressions */
4846+
RETURN_IF_ERROR(
4847+
_PyInstructionSequence_InsertInstruction(
4848+
INSTR_SEQUENCE(c), 0,
4849+
LOAD_FAST, 0, LOC(outermost->iter)));
4850+
RETURN_IF_ERROR(
4851+
_PyInstructionSequence_InsertInstruction(
4852+
INSTR_SEQUENCE(c), 1,
4853+
outermost->is_async ? GET_AITER : GET_ITER,
4854+
0, LOC(outermost->iter)));
4855+
iter_state = ITERATOR_ON_STACK;
4856+
}
4857+
else {
4858+
iter_state = ITERABLE_IN_LOCAL;
4859+
}
48134860
}
48144861
Py_CLEAR(entry);
48154862

@@ -4836,9 +4883,8 @@ codegen_comprehension(compiler *c, expr_ty e, int type,
48364883
ADDOP_I(c, loc, SWAP, 2);
48374884
}
48384885
}
4839-
48404886
if (codegen_comprehension_generator(c, loc, generators, 0, 0,
4841-
elt, val, type, is_inlined) < 0) {
4887+
elt, val, type, iter_state) < 0) {
48424888
goto error_in_scope;
48434889
}
48444890

Python/compile.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,7 +1443,7 @@ optimize_and_assemble_code_unit(struct compiler_unit *u, PyObject *const_cache,
14431443

14441444
int stackdepth;
14451445
int nlocalsplus;
1446-
if (_PyCfg_OptimizedCfgToInstructionSequence(g, &u->u_metadata, code_flags,
1446+
if (_PyCfg_OptimizedCfgToInstructionSequence(g, &u->u_metadata,
14471447
&stackdepth, &nlocalsplus,
14481448
&optimized_instrs) < 0) {
14491449
goto error;
@@ -1718,7 +1718,7 @@ _PyCompile_Assemble(_PyCompile_CodeUnitMetadata *umd, PyObject *filename,
17181718

17191719
int code_flags = 0;
17201720
int stackdepth, nlocalsplus;
1721-
if (_PyCfg_OptimizedCfgToInstructionSequence(g, umd, code_flags,
1721+
if (_PyCfg_OptimizedCfgToInstructionSequence(g, umd,
17221722
&stackdepth, &nlocalsplus,
17231723
&optimized_instrs) < 0) {
17241724
goto error;

0 commit comments

Comments
 (0)