Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions codeflash/api/aiservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def optimize_code(
n_candidates: int = 5,
is_numerical_code: bool | None = None,
rerun_trace_id: str | None = None,
concurrency_metrics: dict[str, float] | None = None,
) -> list[OptimizedCandidate]:
"""Optimize the given code for performance by making a request to the Django endpoint.

Expand Down Expand Up @@ -200,6 +201,9 @@ def optimize_code(
"rerun_trace_id": rerun_trace_id,
}

if concurrency_metrics is not None:
payload["concurrency_metrics"] = concurrency_metrics

self.add_language_metadata(payload, language_version, module_system)

# DEBUG: Print payload language field
Expand Down
67 changes: 55 additions & 12 deletions codeflash/languages/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
AIServiceRefinerRequest,
BestOptimization,
CandidateEvaluationContext,
ConcurrencyMetrics,
GeneratedTests,
GeneratedTestsList,
OptimizationReviewResult,
Expand Down Expand Up @@ -502,6 +503,7 @@ def __init__(
self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None
self.test_files = TestFiles(test_files=[])
self.cached_concurrency_metrics: ConcurrencyMetrics | None = None

default_effort = getattr(args, "effort", EffortLevel.MEDIUM.value) if args else EffortLevel.MEDIUM.value
self.effort = effort_override or default_effort
Expand Down Expand Up @@ -788,20 +790,53 @@ def optimize_function(self) -> Result[BestOptimization, str]:
):
console.rule()
new_code_context = code_context
# Generate tests and optimizations in parallel
future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context)
future_optimizations = self.executor.submit(
self.generate_optimizations,
read_writable_code=code_context.read_writable_code,
read_only_context_code=code_context.read_only_context_code,
run_experiment=should_run_experiment,
is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts,
)

concurrent.futures.wait([future_tests, future_optimizations])
if self.function_to_optimize.is_async:
future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context)
concurrent.futures.wait([future_tests])
test_setup_result = future_tests.result()

pre_optimization_concurrency_metrics: dict[str, float] | None = None
if is_successful(test_setup_result) and self.test_files.test_files:
test_env = self.get_test_env(
codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1
)
metrics = self.run_concurrency_benchmark(
code_context=code_context, original_helper_code=original_helper_code, test_env=test_env
)
if metrics is not None:
self.cached_concurrency_metrics = metrics
pre_optimization_concurrency_metrics = {
"concurrency_ratio": metrics.concurrency_ratio,
"sequential_time_ns": float(metrics.sequential_time_ns),
"concurrent_time_ns": float(metrics.concurrent_time_ns),
}

future_optimizations = self.executor.submit(
self.generate_optimizations,
read_writable_code=code_context.read_writable_code,
read_only_context_code=code_context.read_only_context_code,
run_experiment=should_run_experiment,
is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts,
concurrency_metrics=pre_optimization_concurrency_metrics,
)
concurrent.futures.wait([future_optimizations])
optimization_result = future_optimizations.result()
else:
future_tests = self.executor.submit(self.generate_and_instrument_tests, new_code_context)
future_optimizations = self.executor.submit(
self.generate_optimizations,
read_writable_code=code_context.read_writable_code,
read_only_context_code=code_context.read_only_context_code,
run_experiment=should_run_experiment,
is_numerical_code=self.is_numerical_code and not self.args.no_jit_opts,
)

concurrent.futures.wait([future_tests, future_optimizations])

test_setup_result = future_tests.result()
optimization_result = future_optimizations.result()

test_setup_result = future_tests.result()
optimization_result = future_optimizations.result()
console.rule()

if not is_successful(test_setup_result):
Expand Down Expand Up @@ -1861,6 +1896,7 @@ def generate_optimizations(
read_only_context_code: str,
run_experiment: bool = False,
is_numerical_code: bool | None = None,
concurrency_metrics: dict[str, float] | None = None,
) -> Result[tuple[OptimizationSet, str], str]:
"""Generate optimization candidates for the function. Backend handles multi-model diversity."""
n_candidates = get_effort_value(EffortKeys.N_OPTIMIZER_CANDIDATES, self.effort)
Expand All @@ -1876,6 +1912,7 @@ def generate_optimizations(
n_candidates=n_candidates,
is_numerical_code=is_numerical_code,
rerun_trace_id=self.rerun_trace_id,
concurrency_metrics=concurrency_metrics,
)

future_references = self.executor.submit(
Expand All @@ -1902,6 +1939,7 @@ def generate_optimizations(
language_version=self.language_support.language_version,
is_async=self.function_to_optimize.is_async,
n_candidates=n_candidates,
concurrency_metrics=concurrency_metrics,
)
futures.append(future_candidates_exp)

Expand Down Expand Up @@ -3291,6 +3329,11 @@ def run_concurrency_benchmark(
if not self.function_to_optimize.is_async:
return None

if self.cached_concurrency_metrics is not None:
cached = self.cached_concurrency_metrics
self.cached_concurrency_metrics = None
return cached

from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function

try:
Expand Down
Loading