Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions codeflash/result/critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MIN_TESTCASE_PASSED_THRESHOLD,
MIN_THROUGHPUT_IMPROVEMENT_THRESHOLD,
)
from codeflash.languages.current import is_javascript
from codeflash.models.test_type import TestType

if TYPE_CHECKING:
Expand All @@ -24,6 +25,23 @@ class AcceptanceReason(Enum):
NONE = "none"


JS_NOISE_MULTIPLIER = 3


def compute_noise_floor(original_code_runtime: int, *, disable_gh_action_noise: bool = False) -> float:
"""Compute the noise floor for speedup acceptance based on runtime and language.

JavaScript/TypeScript gets a higher noise floor because separate V8 processes
have significant JIT/GC variance that creates false positive speedups.
"""
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
if is_javascript():
noise_floor *= JS_NOISE_MULTIPLIER
if not disable_gh_action_noise and env_utils.is_ci():
noise_floor *= 2
return noise_floor


def performance_gain(*, original_runtime_ns: int, optimized_runtime_ns: int) -> float:
"""Calculate the performance gain of an optimized code over the original code.

Expand Down Expand Up @@ -91,9 +109,7 @@ def speedup_critic(
- Concurrency improvements detect when blocking calls are replaced with non-blocking equivalents
"""
# Runtime performance evaluation
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_code_runtime < 10000 else MIN_IMPROVEMENT_THRESHOLD
if not disable_gh_action_noise and env_utils.is_ci():
noise_floor = noise_floor * 2 # Increase the noise floor in GitHub Actions mode
noise_floor = compute_noise_floor(original_code_runtime, disable_gh_action_noise=disable_gh_action_noise)

perf_gain = performance_gain(
original_runtime_ns=original_code_runtime, optimized_runtime_ns=candidate_result.best_test_runtime
Expand Down Expand Up @@ -151,9 +167,7 @@ def get_acceptance_reason(
Returns the primary reason for acceptance, with priority:
concurrency > throughput > runtime (for async code).
"""
noise_floor = 3 * MIN_IMPROVEMENT_THRESHOLD if original_runtime_ns < 10000 else MIN_IMPROVEMENT_THRESHOLD
if env_utils.is_ci():
noise_floor = noise_floor * 2
noise_floor = compute_noise_floor(original_runtime_ns)

perf_gain = performance_gain(original_runtime_ns=original_runtime_ns, optimized_runtime_ns=optimized_runtime_ns)
runtime_improved = perf_gain > noise_floor
Expand Down
55 changes: 55 additions & 0 deletions tests/test_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from pathlib import Path
from unittest.mock import Mock

import pytest

from codeflash.code_utils.env_utils import get_pr_number
from codeflash.models.models import (
CodeOptimizationContext,
Expand All @@ -15,7 +17,9 @@
TestResults,
TestType,
)
from codeflash.languages.current import reset_current_language, set_current_language
from codeflash.result.critic import (
compute_noise_floor,
concurrency_gain,
coverage_critic,
performance_gain,
Expand Down Expand Up @@ -799,3 +803,54 @@ def test_parse_concurrency_metrics() -> None:
metrics_no_class = parse_concurrency_metrics(test_results_no_class, "my_function")
assert metrics_no_class is not None
assert metrics_no_class.concurrency_ratio == 2.0 # 5000000 / 2500000


def test_compute_noise_floor_python() -> None:
"""Python noise floor: 5% for >=10μs, 15% for <10μs."""
reset_current_language()
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.05)
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.15)


def test_compute_noise_floor_javascript() -> None:
"""JS noise floor is 3x Python: 15% for >=10μs, 45% for <10μs."""
set_current_language("javascript")
try:
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
assert compute_noise_floor(9_999, disable_gh_action_noise=True) == pytest.approx(0.45)
finally:
reset_current_language()


def test_compute_noise_floor_typescript() -> None:
"""TypeScript gets the same JS multiplier."""
set_current_language("typescript")
try:
assert compute_noise_floor(100_000, disable_gh_action_noise=True) == pytest.approx(0.15)
finally:
reset_current_language()


def test_speedup_critic_rejects_js_false_positive() -> None:
"""A 10.6% speedup that passes for Python should be rejected for JS (noise floor 15%)."""
original_code_runtime = 100_000 # 100μs — above the 10μs fast-function threshold

candidate_result = OptimizedCandidateResult(
max_loop_count=5,
best_test_runtime=90_500, # ~10.5% improvement
behavior_test_results=TestResults(),
benchmarking_test_results=TestResults(),
optimization_candidate_index=0,
total_candidate_timing=12,
)

# Python: 10.5% > 5% noise floor → accepted
reset_current_language()
assert speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)

# JavaScript: 10.5% < 15% noise floor → rejected
set_current_language("javascript")
try:
assert not speedup_critic(candidate_result, original_code_runtime, None, disable_gh_action_noise=True)
finally:
reset_current_language()
Loading