Skip to content

Commit 568a819

Browse files
authored
gh-138122: Validate base frame before caching in remote debugging frame cache (#142852)
1 parent 2b466c4 commit 568a819

File tree

6 files changed

+152
-17
lines changed

6 files changed

+152
-17
lines changed

Lib/test/test_profiling/test_sampling_profiler/test_integration.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
863863
self.assertGreater(cpu_percentage, 90.0,
864864
f"cpu_leaf should dominate samples in 'running' mode, "
865865
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
866+
867+
868+
def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
869+
"""Generate a script with deep nested generators for stress testing."""
870+
lines = [
871+
'import sys',
872+
'sys.setrecursionlimit(5000)',
873+
'',
874+
]
875+
# Generate chain of yield-from functions
876+
for i in range(chain_depth - 1):
877+
lines.extend([
878+
f'def deep_yield_chain_{i}(n):',
879+
f' yield ("L{i}", n)',
880+
f' yield from deep_yield_chain_{i + 1}(n)',
881+
'',
882+
])
883+
# Last chain function calls recursive_diver
884+
lines.extend([
885+
f'def deep_yield_chain_{chain_depth - 1}(n):',
886+
f' yield ("L{chain_depth - 1}", n)',
887+
f' yield from recursive_diver(n, {chain_depth})',
888+
'',
889+
'def recursive_diver(n, depth):',
890+
' yield (f"DIVE_{depth}", n)',
891+
f' if depth < {recurse_depth}:',
892+
' yield from recursive_diver(n, depth + 1)',
893+
' else:',
894+
' for i in range(5):',
895+
' yield (f"BOTTOM_{depth}", i)',
896+
'',
897+
'def oscillating_generator(iterations=1000):',
898+
' for i in range(iterations):',
899+
' yield ("OSCILLATE", i)',
900+
' yield from deep_yield_chain_0(i)',
901+
'',
902+
'def run_forever():',
903+
' while True:',
904+
' for _ in oscillating_generator(10):',
905+
' pass',
906+
'',
907+
'_test_sock.sendall(b"working")',
908+
'run_forever()',
909+
])
910+
return '\n'.join(lines)
911+
912+
913+
@requires_remote_subprocess_debugging()
914+
class TestDeepGeneratorFrameCache(unittest.TestCase):
915+
"""Test frame cache consistency with deep oscillating generator stacks."""
916+
917+
def test_all_stacks_share_same_base_frame(self):
918+
"""Verify all sampled stacks reach the entry point function.
919+
920+
When profiling deep generators that oscillate up and down the call
921+
stack, every sample should include the entry point function
922+
(run_forever) in its call chain. If the frame cache stores
923+
incomplete stacks, some samples will be missing this base function,
924+
causing broken flamegraphs.
925+
"""
926+
script = _generate_deep_generators_script()
927+
with test_subprocess(script, wait_for_working=True) as subproc:
928+
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)
929+
930+
with (
931+
io.StringIO() as captured_output,
932+
mock.patch("sys.stdout", captured_output),
933+
):
934+
profiling.sampling.sample.sample(
935+
subproc.process.pid,
936+
collector,
937+
duration_sec=2,
938+
)
939+
940+
samples_with_entry_point = 0
941+
samples_without_entry_point = 0
942+
total_samples = 0
943+
944+
for (call_tree, _thread_id), count in collector.stack_counter.items():
945+
total_samples += count
946+
if call_tree:
947+
has_entry_point = call_tree and call_tree[0][2] == "<module>"
948+
if has_entry_point:
949+
samples_with_entry_point += count
950+
else:
951+
samples_without_entry_point += count
952+
953+
self.assertGreater(total_samples, 100,
954+
f"Expected at least 100 samples, got {total_samples}")
955+
956+
self.assertEqual(samples_without_entry_point, 0,
957+
f"Found {samples_without_entry_point}/{total_samples} samples "
958+
f"missing the entry point function 'run_forever'. This indicates "
959+
f"incomplete stacks are being returned, likely due to frame cache "
960+
f"storing partial stack traces.")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix incomplete stack traces in the Tachyon profiler's frame cache when
2+
profiling code with deeply nested generators. The frame cache now validates
3+
that stack traces reach the base frame before caching, preventing broken
4+
flamegraphs. Patch by Pablo Galindo.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,8 @@ extern int process_frame_chain(
429429
int *stopped_at_cached_frame,
430430
uintptr_t *frame_addrs,
431431
Py_ssize_t *num_addrs,
432-
Py_ssize_t max_addrs
432+
Py_ssize_t max_addrs,
433+
uintptr_t *out_last_frame_addr
433434
);
434435

435436
/* Frame cache functions */
@@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
447448
Py_ssize_t *num_addrs,
448449
Py_ssize_t max_addrs);
449450
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
451+
// Only stores complete stacks that reach base_frame_addr
450452
extern int frame_cache_store(
451453
RemoteUnwinderObject *unwinder,
452454
uint64_t thread_id,
453455
PyObject *frame_list,
454456
const uintptr_t *addrs,
455-
Py_ssize_t num_addrs);
457+
Py_ssize_t num_addrs,
458+
uintptr_t base_frame_addr,
459+
uintptr_t last_frame_visited);
456460

457461
extern int collect_frames_with_cache(
458462
RemoteUnwinderObject *unwinder,
459463
uintptr_t frame_addr,
460464
StackChunkList *chunks,
461465
PyObject *frame_info,
466+
uintptr_t base_frame_addr,
462467
uintptr_t gc_frame,
463468
uintptr_t last_profiled_frame,
464469
uint64_t thread_id);

Modules/_remote_debugging/frame_cache.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,33 @@ frame_cache_lookup_and_extend(
194194
}
195195

196196
// Store frame list with addresses in cache
197+
// Only stores complete stacks that reach base_frame_addr (validation done internally)
197198
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
198199
int
199200
frame_cache_store(
200201
RemoteUnwinderObject *unwinder,
201202
uint64_t thread_id,
202203
PyObject *frame_list,
203204
const uintptr_t *addrs,
204-
Py_ssize_t num_addrs)
205+
Py_ssize_t num_addrs,
206+
uintptr_t base_frame_addr,
207+
uintptr_t last_frame_visited)
205208
{
206209
if (!unwinder->frame_cache || thread_id == 0) {
207210
return 0;
208211
}
209212

213+
// Validate we have a complete stack before caching.
214+
// Only cache if last_frame_visited matches base_frame_addr (the sentinel
215+
// at the bottom of the stack). Note: we use last_frame_visited rather than
216+
// addrs[num_addrs-1] because the base frame is visited but not added to the
217+
// addrs array (it returns frame==NULL from is_frame_valid due to
218+
// owner==FRAME_OWNED_BY_INTERPRETER).
219+
if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
220+
// Incomplete stack - don't cache (graceful degradation)
221+
return 0;
222+
}
223+
210224
// Clamp to max frames
211225
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
212226
num_addrs = FRAME_CACHE_MAX_FRAMES;

Modules/_remote_debugging/frames.c

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -265,18 +265,22 @@ process_frame_chain(
265265
int *stopped_at_cached_frame,
266266
uintptr_t *frame_addrs, // optional: C array to receive frame addresses
267267
Py_ssize_t *num_addrs, // in/out: current count / updated count
268-
Py_ssize_t max_addrs) // max capacity of frame_addrs array
268+
Py_ssize_t max_addrs, // max capacity of frame_addrs array
269+
uintptr_t *out_last_frame_addr) // optional: receives last frame address visited
269270
{
270271
uintptr_t frame_addr = initial_frame_addr;
271272
uintptr_t prev_frame_addr = 0;
272273
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
273274
const size_t MAX_FRAMES = 1024 + 512;
274275
size_t frame_count = 0;
275276

276-
// Initialize output flag
277+
// Initialize output parameters
277278
if (stopped_at_cached_frame) {
278279
*stopped_at_cached_frame = 0;
279280
}
281+
if (out_last_frame_addr) {
282+
*out_last_frame_addr = 0;
283+
}
280284

281285
// Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
282286
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
@@ -390,6 +394,11 @@ process_frame_chain(
390394
return -1;
391395
}
392396

397+
// Set output parameter for caller (needed for cache validation)
398+
if (out_last_frame_addr) {
399+
*out_last_frame_addr = last_frame_addr;
400+
}
401+
393402
return 0;
394403
}
395404

@@ -537,6 +546,7 @@ collect_frames_with_cache(
537546
uintptr_t frame_addr,
538547
StackChunkList *chunks,
539548
PyObject *frame_info,
549+
uintptr_t base_frame_addr,
540550
uintptr_t gc_frame,
541551
uintptr_t last_profiled_frame,
542552
uint64_t thread_id)
@@ -551,11 +561,13 @@ collect_frames_with_cache(
551561
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
552562
Py_ssize_t num_addrs = 0;
553563
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
564+
uintptr_t last_frame_visited = 0;
554565

555566
int stopped_at_cached = 0;
556-
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
567+
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
557568
last_profiled_frame, &stopped_at_cached,
558-
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
569+
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
570+
&last_frame_visited) < 0) {
559571
return -1;
560572
}
561573

@@ -575,23 +587,28 @@ collect_frames_with_cache(
575587
// Cache miss - continue walking from last_profiled_frame to get the rest
576588
STATS_INC(unwinder, frame_cache_misses);
577589
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
578-
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
579-
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
590+
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
591+
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
592+
&last_frame_visited) < 0) {
580593
return -1;
581594
}
582595
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
583596
} else {
584-
// Partial cache hit
597+
// Partial cache hit - cache was validated when stored, so we trust it
585598
STATS_INC(unwinder, frame_cache_partial_hits);
586599
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
587600
}
588-
} else if (last_profiled_frame == 0) {
589-
// No cache involvement (no last_profiled_frame or cache disabled)
590-
STATS_INC(unwinder, frame_cache_misses);
601+
} else {
602+
if (last_profiled_frame == 0) {
603+
// No cache involvement (no last_profiled_frame or cache disabled)
604+
STATS_INC(unwinder, frame_cache_misses);
605+
}
591606
}
592607

593-
// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
594-
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
608+
// Store in cache - frame_cache_store validates internally that we have a
609+
// complete stack (reached base_frame_addr) before actually storing
610+
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
611+
base_frame_addr, last_frame_visited) < 0) {
595612
return -1;
596613
}
597614

Modules/_remote_debugging/threads.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ unwind_stack_for_thread(
430430
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
431431
unwinder->debug_offsets.thread_state.last_profiled_frame);
432432
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
433-
gc_frame, last_profiled_frame, tid) < 0) {
433+
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
434434
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
435435
goto error;
436436
}
@@ -444,7 +444,7 @@ unwind_stack_for_thread(
444444
} else {
445445
// No caching - process entire frame chain with base_frame validation
446446
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
447-
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
447+
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) {
448448
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
449449
goto error;
450450
}

0 commit comments

Comments
 (0)