Skip to content

Commit 888d101

Browse files
lkollarpablogsal
andauthored
gh-138122: Remove default duration for statistical profiling (#143174)
Co-authored-by: Pablo Galindo Salgado <pablogsal@gmail.com>
1 parent ea3fd78 commit 888d101

File tree

3 files changed

+33
-33
lines changed

3 files changed

+33
-33
lines changed

Doc/library/profiling.sampling.rst

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,8 @@ is unaware it is being profiled.
241241
When profiling production systems, keep these guidelines in mind:
242242

243243
Start with shorter durations (10-30 seconds) to get quick results, then extend
244-
if you need more statistical accuracy. The default 10-second duration is usually
245-
sufficient to identify major hotspots.
244+
if you need more statistical accuracy. By default, profiling runs until the
245+
target process completes, which is usually sufficient to identify major hotspots.
246246

247247
If possible, profile during representative load rather than peak traffic.
248248
Profiles collected during normal operation are easier to interpret than those
@@ -329,7 +329,7 @@ The default configuration works well for most use cases:
329329
* - Default for ``--sampling-rate`` / ``-r``
330330
- 1 kHz
331331
* - Default for ``--duration`` / ``-d``
332-
- 10 seconds
332+
- Run to completion
333333
* - Default for ``--all-threads`` / ``-a``
334334
- Main thread only
335335
* - Default for ``--native``
@@ -363,15 +363,14 @@ cost of slightly higher profiler CPU usage. Lower rates reduce profiler
363363
overhead but may miss short-lived functions. For most applications, the
364364
default rate provides a good balance between accuracy and overhead.
365365

366-
The :option:`--duration` option (:option:`-d`) sets how long to profile in seconds. The
367-
default is 10 seconds::
366+
The :option:`--duration` option (:option:`-d`) sets how long to profile in seconds. By
367+
default, profiling continues until the target process exits or is interrupted::
368368

369369
python -m profiling.sampling run -d 60 script.py
370370

371-
Longer durations collect more samples and produce more statistically reliable
372-
results, especially for code paths that execute infrequently. When profiling
373-
a program that runs for a fixed time, you may want to set the duration to
374-
match or exceed the expected runtime.
371+
Specifying a duration is useful when attaching to long-running processes or when
372+
you want to limit profiling to a specific time window. When profiling a script,
373+
the default behavior of running to completion is usually what you want.
375374

376375

377376
Thread selection
@@ -1394,7 +1393,7 @@ Sampling options
13941393

13951394
.. option:: -d <seconds>, --duration <seconds>
13961395

1397-
Profiling duration in seconds. Default: 10.
1396+
Profiling duration in seconds. Default: run to completion.
13981397

13991398
.. option:: -a, --all-threads
14001399

Lib/profiling/sampling/cli.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def _build_child_profiler_args(args):
120120
# Sampling options
121121
hz = MICROSECONDS_PER_SECOND // args.sample_interval_usec
122122
child_args.extend(["-r", str(hz)])
123-
child_args.extend(["-d", str(args.duration)])
124-
123+
if args.duration is not None:
124+
child_args.extend(["-d", str(args.duration)])
125125
if args.all_threads:
126126
child_args.append("-a")
127127
if args.realtime_stats:
@@ -356,9 +356,9 @@ def _add_sampling_options(parser):
356356
"-d",
357357
"--duration",
358358
type=int,
359-
default=10,
359+
default=None,
360360
metavar="SECONDS",
361-
help="Sampling duration",
361+
help="Sampling duration (default: run to completion)",
362362
)
363363
sampling_group.add_argument(
364364
"-a",
@@ -562,7 +562,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
562562
if format_type == "binary":
563563
if output_file is None:
564564
raise ValueError("Binary format requires an output file")
565-
return collector_class(output_file, interval, skip_idle=skip_idle,
565+
return collector_class(output_file, sample_interval_usec, skip_idle=skip_idle,
566566
compression=compression)
567567

568568
# Gecko format never skips idle (it needs both GIL and CPU data)
@@ -643,11 +643,11 @@ def _validate_args(args, parser):
643643
return
644644

645645
# Warn about blocking mode with aggressive sampling intervals
646-
if args.blocking and args.interval < 100:
646+
if args.blocking and args.sample_interval_usec < 100:
647647
print(
648-
f"Warning: --blocking with a {args.interval} µs interval will stop all threads "
649-
f"{1_000_000 // args.interval} times per second. "
650-
"Consider using --interval 1000 or higher to reduce overhead.",
648+
f"Warning: --blocking with a {args.sample_interval_usec} µs interval will stop all threads "
649+
f"{1_000_000 // args.sample_interval_usec} times per second. "
650+
"Consider using --sampling-rate 1khz or lower to reduce overhead.",
651651
file=sys.stderr
652652
)
653653

@@ -1107,7 +1107,7 @@ def _handle_live_run(args):
11071107
if process.poll() is None:
11081108
process.terminate()
11091109
try:
1110-
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
1110+
process.wait(timeout=_PROCESS_KILL_TIMEOUT_SEC)
11111111
except subprocess.TimeoutExpired:
11121112
process.kill()
11131113
process.wait()

Lib/profiling/sampling/sample.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,18 +76,18 @@ def _new_unwinder(self, native, gc, opcodes, skip_non_matching_threads):
7676
)
7777
return unwinder
7878

79-
def sample(self, collector, duration_sec=10, *, async_aware=False):
79+
def sample(self, collector, duration_sec=None, *, async_aware=False):
8080
sample_interval_sec = self.sample_interval_usec / 1_000_000
81-
running_time = 0
8281
num_samples = 0
8382
errors = 0
8483
interrupted = False
84+
running_time_sec = 0
8585
start_time = next_time = time.perf_counter()
8686
last_sample_time = start_time
8787
realtime_update_interval = 1.0 # Update every second
8888
last_realtime_update = start_time
8989
try:
90-
while running_time < duration_sec:
90+
while duration_sec is None or running_time_sec < duration_sec:
9191
# Check if live collector wants to stop
9292
if hasattr(collector, 'running') and not collector.running:
9393
break
@@ -104,7 +104,7 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
104104
stack_frames = self.unwinder.get_stack_trace()
105105
collector.collect(stack_frames)
106106
except ProcessLookupError as e:
107-
duration_sec = current_time - start_time
107+
running_time_sec = current_time - start_time
108108
break
109109
except (RuntimeError, UnicodeDecodeError, MemoryError, OSError):
110110
collector.collect_failed_sample()
@@ -135,25 +135,25 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
135135
num_samples += 1
136136
next_time += sample_interval_sec
137137

138-
running_time = time.perf_counter() - start_time
138+
running_time_sec = time.perf_counter() - start_time
139139
except KeyboardInterrupt:
140140
interrupted = True
141-
running_time = time.perf_counter() - start_time
141+
running_time_sec = time.perf_counter() - start_time
142142
print("Interrupted by user.")
143143

144144
# Clear real-time stats line if it was being displayed
145145
if self.realtime_stats and len(self.sample_intervals) > 0:
146146
print() # Add newline after real-time stats
147147

148-
sample_rate = num_samples / running_time if running_time > 0 else 0
148+
sample_rate = num_samples / running_time_sec if running_time_sec > 0 else 0
149149
error_rate = (errors / num_samples) * 100 if num_samples > 0 else 0
150-
expected_samples = int(duration_sec / sample_interval_sec)
150+
expected_samples = int(running_time_sec / sample_interval_sec)
151151
missed_samples = (expected_samples - num_samples) / expected_samples * 100 if expected_samples > 0 else 0
152152

153153
# Don't print stats for live mode (curses is handling display)
154154
is_live_mode = LiveStatsCollector is not None and isinstance(collector, LiveStatsCollector)
155155
if not is_live_mode:
156-
print(f"Captured {num_samples:n} samples in {fmt(running_time, 2)} seconds")
156+
print(f"Captured {num_samples:n} samples in {fmt(running_time_sec, 2)} seconds")
157157
print(f"Sample rate: {fmt(sample_rate, 2)} samples/sec")
158158
print(f"Error rate: {fmt(error_rate, 2)}")
159159

@@ -166,7 +166,7 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
166166

167167
# Pass stats to flamegraph collector if it's the right type
168168
if hasattr(collector, 'set_stats'):
169-
collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode)
169+
collector.set_stats(self.sample_interval_usec, running_time_sec, sample_rate, error_rate, missed_samples, mode=self.mode)
170170

171171
if num_samples < expected_samples and not is_live_mode and not interrupted:
172172
print(
@@ -363,7 +363,7 @@ def sample(
363363
pid,
364364
collector,
365365
*,
366-
duration_sec=10,
366+
duration_sec=None,
367367
all_threads=False,
368368
realtime_stats=False,
369369
mode=PROFILING_MODE_WALL,
@@ -378,7 +378,8 @@ def sample(
378378
Args:
379379
pid: Process ID to sample
380380
collector: Collector instance to use for gathering samples
381-
duration_sec: How long to sample for (seconds)
381+
duration_sec: How long to sample for (seconds), or None to run until
382+
the process exits or interrupted
382383
all_threads: Whether to sample all threads
383384
realtime_stats: Whether to print real-time sampling statistics
384385
mode: Profiling mode - WALL (all samples), CPU (only when on CPU),
@@ -427,7 +428,7 @@ def sample_live(
427428
pid,
428429
collector,
429430
*,
430-
duration_sec=10,
431+
duration_sec=None,
431432
all_threads=False,
432433
realtime_stats=False,
433434
mode=PROFILING_MODE_WALL,

0 commit comments

Comments
 (0)