Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
# setting this value to 1 will disable repair if there is at least one correct candidate
MIN_CORRECT_CANDIDATES = 2

# Skip benchmarking if candidate behavioral runtime exceeds this multiple of baseline behavioral runtime
BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD = 10.0

try:
from codeflash.lsp.helpers import is_LSP_enabled

Expand Down
20 changes: 20 additions & 0 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
unified_diff_strings,
)
from codeflash.code_utils.config_consts import (
BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD,
COVERAGE_THRESHOLD,
INDIVIDUAL_TESTCASE_TIMEOUT,
MIN_CORRECT_CANDIDATES,
Expand Down Expand Up @@ -2529,6 +2530,25 @@ def run_optimized_candidate(
)
return self.get_results_not_matched_error()

candidate_behavior_runtime = candidate_behavior_results.total_passed_runtime()
baseline_behavior_runtime = baseline_results.behavior_test_results.total_passed_runtime()
if (
baseline_behavior_runtime > 0
and candidate_behavior_runtime > 0
and candidate_behavior_runtime > baseline_behavior_runtime * BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD
):
slowdown = candidate_behavior_runtime / baseline_behavior_runtime
logger.info(
"h4|Candidate %d is %.1fx slower than baseline in behavioral tests, skipping benchmarking ⏭️",
optimization_candidate_index,
slowdown,
)
console.rule()
return Failure(
f"Candidate behavioral runtime ({candidate_behavior_runtime}ns) "
f"exceeds {BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD:.0f}x baseline ({baseline_behavior_runtime}ns)."
)

logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
console.rule()

Expand Down
Empty file added tests/optimization/__init__.py
Empty file.
189 changes: 189 additions & 0 deletions tests/optimization/test_behavioral_timing_gate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock, patch

from codeflash.code_utils.config_consts import BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD
from codeflash.either import Failure, is_successful
from codeflash.models.models import FunctionTestInvocation, InvocationId, OriginalCodeBaseline, TestResults
from codeflash.models.test_type import TestType

MOCK_FILE_PATH = Path("tests/test_example.py")


def _make_test_results(runtime_ns: int) -> TestResults:
results = TestResults()
invocation = FunctionTestInvocation(
loop_index=1,
id=InvocationId(
test_module_path="tests.test_example",
test_class_name=None,
test_function_name="test_func",
function_getting_tested="func",
iteration_id="0",
),
file_name=MOCK_FILE_PATH,
did_pass=True,
runtime=runtime_ns,
test_framework="pytest",
test_type=TestType.EXISTING_UNIT_TEST,
return_value=None,
timed_out=False,
)
results.add(invocation)
return results


def _make_baseline(behavior_runtime_ns: int) -> OriginalCodeBaseline:
return OriginalCodeBaseline(
behavior_test_results=_make_test_results(behavior_runtime_ns),
benchmarking_test_results=TestResults(),
line_profile_results={},
runtime=behavior_runtime_ns,
coverage_results=None,
)


def _make_optimizer_mock(mock_run_and_parse: MagicMock, mock_compare_results: MagicMock, **kwargs: object) -> MagicMock:
optimizer = MagicMock()
optimizer.function_to_optimize.file_path = MOCK_FILE_PATH
optimizer.function_to_optimize.is_async = False
optimizer.test_files = []
optimizer.run_and_parse_tests = mock_run_and_parse
optimizer.compare_candidate_results = mock_compare_results
for key, value in kwargs.items():
setattr(optimizer, key, value) if "." not in key else None
return optimizer


def _run_candidate(optimizer: MagicMock, baseline: OriginalCodeBaseline) -> Failure | object:
from codeflash.optimization.function_optimizer import FunctionOptimizer

with patch.object(Path, "read_text", return_value="def func(): pass"):
return FunctionOptimizer.run_optimized_candidate(
optimizer,
optimization_candidate_index=1,
baseline_results=baseline,
original_helper_code={},
file_path_to_helper_classes={},
eval_ctx=MagicMock(),
code_context=MagicMock(),
candidate=MagicMock(),
exp_type="test",
)


class TestBehavioralTimingGate:
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_slow_candidate_skips_benchmarking(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""A candidate 15x slower than baseline should be rejected without benchmarking."""
baseline = _make_baseline(1000)
candidate_results = _make_test_results(15000) # 15x slower

mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.return_value = (candidate_results, None)
mock_compare_results.return_value = (True, [])

optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env

result = _run_candidate(optimizer, baseline)

assert not is_successful(result)
assert isinstance(result, Failure)
assert f"{BEHAVIORAL_SLOWDOWN_SKIP_THRESHOLD:.0f}x" in str(result.value)
assert mock_run_and_parse.call_count == 1

@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_acceptable_candidate_proceeds_to_benchmarking(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""A candidate 5x slower than baseline (under threshold) should proceed to benchmarking."""
baseline = _make_baseline(1000)
candidate_behavior_results = _make_test_results(5000) # 5x slower, under 10x threshold
candidate_benchmark_results = _make_test_results(5000)

mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.side_effect = [(candidate_behavior_results, None), (candidate_benchmark_results, None)]
mock_compare_results.return_value = (True, [])

optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env
optimizer.args.benchmark = False
optimizer.collect_async_metrics.return_value = (None, None)

result = _run_candidate(optimizer, baseline)

assert mock_run_and_parse.call_count == 2

@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.run_and_parse_tests")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.compare_candidate_results")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.write_code_and_helpers")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.instrument_capture")
@patch("codeflash.optimization.function_optimizer.FunctionOptimizer.get_test_env")
@patch("codeflash.optimization.function_optimizer.get_run_tmp_file")
def test_zero_baseline_runtime_skips_check(
self,
mock_get_run_tmp_file: MagicMock,
mock_get_test_env: MagicMock,
mock_instrument_capture: MagicMock,
mock_write_code_and_helpers: MagicMock,
mock_compare_results: MagicMock,
mock_run_and_parse: MagicMock,
) -> None:
"""When baseline behavioral runtime is 0, the timing gate should be skipped."""
baseline = OriginalCodeBaseline(
behavior_test_results=TestResults(), # Empty = 0 runtime
benchmarking_test_results=TestResults(),
line_profile_results={},
runtime=0,
coverage_results=None,
)
candidate_behavior_results = _make_test_results(50000)
candidate_benchmark_results = _make_test_results(50000)

mock_get_run_tmp_file.return_value = MagicMock()
mock_get_test_env.return_value = {}
mock_run_and_parse.side_effect = [(candidate_behavior_results, None), (candidate_benchmark_results, None)]
mock_compare_results.return_value = (True, [])

optimizer = _make_optimizer_mock(mock_run_and_parse, mock_compare_results)
optimizer.write_code_and_helpers = mock_write_code_and_helpers
optimizer.instrument_capture = mock_instrument_capture
optimizer.get_test_env = mock_get_test_env
optimizer.args.benchmark = False
optimizer.collect_async_metrics.return_value = (None, None)

result = _run_candidate(optimizer, baseline)

assert mock_run_and_parse.call_count == 2
Loading