Skip to content

Commit 8a9c031

Browse files
SL-Marclaude
andcommitted
Add two-stage code generation: framework stubs then mathematical core
Local coding LLMs (14-24B params) cannot simultaneously produce correct QC framework code AND implement novel mathematical models. This splits code generation into two focused LLM calls: Stage 1 generates the QC framework with stub methods (pass bodies) for novel math. Stage 2 gets a narrower task: implement ONLY the stub methods given the mathematical specification. Fallbacks: Stage 1 failure → single-shot generate_qc_code(); Stage 2 failure → keep Stage 1 framework (compiles, stubs are valid Python); no stubs detected → skip Stage 2 entirely. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 0a17568 commit 8a9c031

4 files changed

Lines changed: 540 additions & 9 deletions

File tree

quantcoder/core/llm.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,149 @@ def generate_qc_code(self, summary: str) -> Optional[str]:
370370

371371
return self._strip_markdown(code)
372372

373+
def generate_qc_framework(self, summary: str) -> Optional[str]:
374+
"""Stage 1 — Generate QC algorithm with stub methods for novel math.
375+
376+
Produces a compilable algorithm where standard QC framework code is
377+
fully implemented, but novel mathematical models / custom indicators
378+
are left as method stubs (signature + docstring + ``pass``).
379+
380+
Returns:
381+
Code string with stubs, or None on failure.
382+
"""
383+
self.logger.info("Stage 1: Generating QC framework with stubs")
384+
385+
system = (
386+
"You are an expert QuantConnect algorithm developer. You write "
387+
"production-quality LEAN Python algorithms.\n\n"
388+
"CRITICAL RULES:\n"
389+
"1. ALWAYS start with: from AlgorithmImports import *\n"
390+
"2. Class must inherit from QCAlgorithm\n"
391+
"3. Use snake_case methods: self.set_start_date(), self.set_cash(), "
392+
"self.add_equity(), etc.\n"
393+
"4. Register indicators via self methods, NOT standalone constructors\n"
394+
"5. Always check indicator.is_ready before using .current.value\n"
395+
"6. Use self.set_holdings() for position sizing or self.market_order() "
396+
"for discrete orders\n"
397+
"7. NEVER invent indicators or classes that don't exist in QuantConnect\n"
398+
"8. Return ONLY Python code, no markdown, no explanations\n\n"
399+
"INDICATOR SIGNATURES (these are EXACT - do NOT omit parameters):\n"
400+
"- self.sma(symbol, period, resolution) -> 3 args\n"
401+
"- self.ema(symbol, period, resolution) -> 3 args\n"
402+
"- self.rsi(symbol, period, moving_average_type, resolution) -> 4 args\n"
403+
"- self.atr(symbol, period, moving_average_type, resolution) -> 4 args\n"
404+
"- self.macd(symbol, fast_period, slow_period, signal_period, "
405+
"moving_average_type, resolution) -> 6 args\n"
406+
"- self.bb(symbol, period, k, moving_average_type, resolution) -> 5 args\n"
407+
"- self.momp(symbol, period, resolution) -> 3 args\n"
408+
"- self.adx(symbol, period, resolution) -> 3 args\n\n"
409+
"STUB METHODS RULE:\n"
410+
"For any mathematical model, custom indicator, or non-standard "
411+
"calculation (e.g., Ornstein-Uhlenbeck process, HMM, regime-switching, "
412+
"jump-diffusion, custom scoring), create a METHOD STUB:\n"
413+
"- Define the method with its full signature\n"
414+
"- Add a docstring describing WHAT to compute and the expected "
415+
"return value\n"
416+
"- Use `pass` as the only body statement\n"
417+
"Standard QC indicators (RSI, SMA, EMA, MACD, etc.) should be used "
418+
"directly — only create stubs for novel/custom calculations.\n"
419+
"The algorithm MUST be compilable even with pass-only stubs."
420+
)
421+
422+
prompt = (
423+
f"Convert this trading strategy into a complete QuantConnect Python "
424+
f"algorithm:\n\n{summary}\n\n"
425+
"IMPORTANT: All framework code (initialize, on_data, scheduling, "
426+
"position management) must be fully implemented. Any novel "
427+
"mathematical model or custom calculation should be a method stub "
428+
"with a descriptive docstring and `pass` as the body."
429+
)
430+
431+
try:
432+
messages = [
433+
{"role": "system", "content": system},
434+
{"role": "user", "content": prompt},
435+
]
436+
code = _run_async(
437+
self._code_llm.chat(
438+
messages=messages,
439+
max_tokens=self.max_tokens,
440+
temperature=0.3,
441+
)
442+
)
443+
self.logger.info(
444+
f"Stage 1 framework generated with {self._code_llm.get_model_name()}"
445+
)
446+
except Exception as e:
447+
self.logger.error(f"Stage 1 (generate_qc_framework) failed: {e}")
448+
return None
449+
450+
return self._strip_markdown(code)
451+
452+
def fill_mathematical_core(
453+
self, summary: str, framework_code: str
454+
) -> Optional[str]:
455+
"""Stage 2 — Fill stub methods with mathematical implementations.
456+
457+
Given a QC algorithm where novel math methods are stubs (``pass``
458+
bodies), implement ONLY those methods using numpy / manual
459+
calculations. Framework code (initialize, on_data, scheduling) is
460+
returned unchanged.
461+
462+
Returns:
463+
Complete algorithm code with stubs filled, or None on failure.
464+
"""
465+
self.logger.info("Stage 2: Filling mathematical core in stub methods")
466+
467+
system = (
468+
"You are given a QuantConnect algorithm with placeholder methods "
469+
"(pass bodies). Your ONLY job is to implement the stub methods.\n\n"
470+
"RULES:\n"
471+
"1. Do NOT modify initialize(), on_data(), scheduling, or position "
472+
"management code.\n"
473+
"2. Implement ONLY the methods whose body is currently `pass`.\n"
474+
"3. Follow each stub's docstring precisely — it describes what to "
475+
"compute.\n"
476+
"4. Do NOT substitute standard indicators (RSI, SMA, EMA) for the "
477+
"model described in the docstring.\n"
478+
"5. You may import numpy as np at the top of the file.\n"
479+
"6. Use self.history() to get price history as a DataFrame when "
480+
"needed.\n"
481+
"7. Use RollingWindow[float] or plain lists to maintain state "
482+
"across calls.\n"
483+
"8. Return the COMPLETE algorithm (framework + filled methods), "
484+
"not just the methods.\n"
485+
"9. Return ONLY Python code, no markdown, no explanations."
486+
)
487+
488+
prompt = (
489+
f"STRATEGY SPECIFICATION (for mathematical context):\n{summary}\n\n"
490+
f"ALGORITHM WITH STUB METHODS:\n{framework_code}\n\n"
491+
"Implement ONLY the stub methods (those with `pass` as their body). "
492+
"Return the complete algorithm with the stubs filled in."
493+
)
494+
495+
try:
496+
messages = [
497+
{"role": "system", "content": system},
498+
{"role": "user", "content": prompt},
499+
]
500+
code = _run_async(
501+
self._code_llm.chat(
502+
messages=messages,
503+
max_tokens=self.max_tokens,
504+
temperature=0.3,
505+
)
506+
)
507+
self.logger.info(
508+
f"Stage 2 math core filled with {self._code_llm.get_model_name()}"
509+
)
510+
except Exception as e:
511+
self.logger.error(f"Stage 2 (fill_mathematical_core) failed: {e}")
512+
return None
513+
514+
return self._strip_markdown(code)
515+
373516
def refine_code(self, code: str) -> Optional[str]:
374517
"""Fix errors in generated QuantConnect code."""
375518
self.logger.info("Refining generated code")

quantcoder/core/processor.py

Lines changed: 105 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,14 @@ def extract_structure_and_generate_code(self, pdf_path: str) -> Dict:
356356
def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
357357
"""Generate QuantConnect code from a pre-existing summary.
358358
359-
Includes a fidelity assessment loop: after the code passes syntax
360-
validation, mistral evaluates whether it faithfully implements the
361-
summary. If not, qwen regenerates with structured critique.
359+
Uses a two-stage pipeline:
360+
Stage 1 — generate QC framework with method stubs for novel math.
361+
Stage 2 — fill stub methods with mathematical implementations.
362+
363+
Falls back to single-shot ``generate_qc_code()`` if Stage 1 fails.
364+
Falls back to the Stage 1 framework if Stage 2 fails syntax checks.
365+
366+
After both stages, runs the fidelity assessment loop (unchanged).
362367
363368
Args:
364369
summary_text: The strategy summary text
@@ -372,9 +377,15 @@ def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
372377
self.logger.error("Empty summary provided")
373378
return None
374379

375-
# -- Phase 1: generate + syntax validation ---------------------------
376-
qc_code = self.llm_handler.generate_qc_code(summary_text)
380+
# -- Phase 1: two-stage code generation + syntax validation -----------
381+
382+
# Stage 1: framework with stubs (fall back to single-shot)
383+
qc_code = self.llm_handler.generate_qc_framework(summary_text)
384+
if not qc_code:
385+
self.logger.warning("Stage 1 failed, falling back to single-shot generate_qc_code")
386+
qc_code = self.llm_handler.generate_qc_code(summary_text)
377387

388+
# Syntax validation loop on Stage 1 output
378389
attempt = 0
379390
while qc_code and not self._validate_code(qc_code) and attempt < self.max_refine_attempts:
380391
self.logger.info(f"Syntax refine attempt {attempt + 1}")
@@ -388,7 +399,48 @@ def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
388399
self.logger.error("Failed to generate syntactically valid code")
389400
return "QuantConnect code could not be generated successfully."
390401

391-
# -- Phase 2: fidelity assessment loop --------------------------------
402+
# Stage 2: fill mathematical core (only if stubs detected)
403+
framework_code = qc_code # save as fallback anchor
404+
if self._has_stub_methods(framework_code):
405+
self.logger.info("Stubs detected — running Stage 2 (fill mathematical core)")
406+
filled_code = self.llm_handler.fill_mathematical_core(
407+
summary_text, framework_code
408+
)
409+
410+
if filled_code:
411+
# Syntax validation loop on Stage 2 output
412+
s2_attempt = 0
413+
while (
414+
not self._validate_code(filled_code)
415+
and s2_attempt < self.max_refine_attempts
416+
):
417+
self.logger.info(
418+
f"Stage 2 syntax refine attempt {s2_attempt + 1}"
419+
)
420+
refined = self.llm_handler.refine_code(filled_code)
421+
if refined and self._validate_code(refined):
422+
filled_code = refined
423+
break
424+
elif refined:
425+
filled_code = refined
426+
s2_attempt += 1
427+
428+
if self._validate_code(filled_code):
429+
self.logger.info("Stage 2 code is syntactically valid")
430+
qc_code = filled_code
431+
else:
432+
self.logger.warning(
433+
"Stage 2 code failed syntax validation — "
434+
"keeping Stage 1 framework"
435+
)
436+
else:
437+
self.logger.warning(
438+
"Stage 2 returned no code — keeping Stage 1 framework"
439+
)
440+
else:
441+
self.logger.info("No stubs detected — skipping Stage 2")
442+
443+
# -- Phase 2: fidelity assessment loop (unchanged) --------------------
392444
for fidelity_attempt in range(self.max_fidelity_attempts):
393445
self.logger.info(f"Fidelity assessment attempt {fidelity_attempt + 1}")
394446

@@ -449,3 +501,50 @@ def _validate_code(self, code: str) -> bool:
449501
except Exception as e:
450502
self.logger.error(f"Validation error: {e}")
451503
return False
504+
505+
@staticmethod
506+
def _has_stub_methods(code: str) -> bool:
507+
"""Detect whether *code* contains method stubs (def + docstring + pass).
508+
509+
A stub is a ``def`` whose body consists of only an optional docstring
510+
followed by ``pass``. We use a simple line-based heuristic:
511+
scan for ``pass`` lines and look backward for a preceding ``def``
512+
with only blank lines, comments, or docstring delimiters between.
513+
514+
False positives are harmless (Stage 2 preserves non-stub code).
515+
"""
516+
if not code:
517+
return False
518+
519+
lines = code.splitlines()
520+
for i, line in enumerate(lines):
521+
stripped = line.strip()
522+
if stripped != "pass":
523+
continue
524+
# Walk backward from this ``pass`` looking for a ``def``
525+
in_docstring = False
526+
j = i - 1
527+
while j >= 0:
528+
prev = lines[j].strip()
529+
# toggle docstring state on triple-quote lines
530+
if prev.startswith('"""') or prev.startswith("'''"):
531+
if prev.count('"""') == 2 or prev.count("'''") == 2:
532+
# single-line docstring — keep walking
533+
j -= 1
534+
continue
535+
in_docstring = not in_docstring
536+
j -= 1
537+
continue
538+
if in_docstring:
539+
j -= 1
540+
continue
541+
# skip blank lines and comments
542+
if prev == "" or prev.startswith("#"):
543+
j -= 1
544+
continue
545+
# the first real statement should be a def
546+
if prev.startswith("def ") and prev.endswith(":"):
547+
return True
548+
# anything else means this pass is not a stub
549+
break
550+
return False

tests/test_llm.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,101 @@ def test_malformed_response_returns_unfaithful(self, mock_config):
337337
assert result["score"] == 1
338338

339339

340+
class TestGenerateQCFramework:
341+
"""Tests for LLMHandler.generate_qc_framework (Stage 1)."""
342+
343+
def _make_handler(self, mock_config):
344+
with patch("quantcoder.core.llm.LLMFactory") as mock_factory:
345+
mock_provider = MagicMock()
346+
mock_provider.get_model_name.return_value = "qwen2.5-coder:14b"
347+
mock_provider.chat = AsyncMock(return_value="test")
348+
mock_factory.create.return_value = mock_provider
349+
handler = LLMHandler(mock_config)
350+
return handler
351+
352+
def test_returns_code_with_stubs(self, mock_config):
353+
"""Returns framework code containing stub methods."""
354+
handler = self._make_handler(mock_config)
355+
stub_code = (
356+
"from AlgorithmImports import *\n"
357+
"class OUAlgo(QCAlgorithm):\n"
358+
" def initialize(self): pass\n"
359+
" def _compute_ou_signal(self, prices):\n"
360+
' """Compute OU mean-reversion signal."""\n'
361+
" pass\n"
362+
)
363+
with patch("quantcoder.core.llm._run_async", return_value=stub_code):
364+
result = handler.generate_qc_framework("OU mean reversion strategy")
365+
366+
assert result is not None
367+
assert "pass" in result
368+
assert "AlgorithmImports" in result
369+
370+
def test_strips_markdown(self, mock_config):
371+
"""Markdown fences are stripped from output."""
372+
handler = self._make_handler(mock_config)
373+
md = "```python\ndef test(): pass\n```"
374+
with patch("quantcoder.core.llm._run_async", return_value=md):
375+
result = handler.generate_qc_framework("strategy")
376+
377+
assert "```" not in result
378+
379+
def test_returns_none_on_failure(self, mock_config):
380+
"""LLM exception returns None."""
381+
handler = self._make_handler(mock_config)
382+
with patch("quantcoder.core.llm._run_async", side_effect=Exception("timeout")):
383+
result = handler.generate_qc_framework("strategy")
384+
385+
assert result is None
386+
387+
388+
class TestFillMathematicalCore:
389+
"""Tests for LLMHandler.fill_mathematical_core (Stage 2)."""
390+
391+
def _make_handler(self, mock_config):
392+
with patch("quantcoder.core.llm.LLMFactory") as mock_factory:
393+
mock_provider = MagicMock()
394+
mock_provider.get_model_name.return_value = "qwen2.5-coder:14b"
395+
mock_provider.chat = AsyncMock(return_value="test")
396+
mock_factory.create.return_value = mock_provider
397+
handler = LLMHandler(mock_config)
398+
return handler
399+
400+
def test_returns_filled_code(self, mock_config):
401+
"""Returns complete algorithm with stubs filled."""
402+
handler = self._make_handler(mock_config)
403+
filled = (
404+
"from AlgorithmImports import *\n"
405+
"import numpy as np\n"
406+
"class OUAlgo(QCAlgorithm):\n"
407+
" def _compute_ou_signal(self, prices):\n"
408+
" log_prices = np.log(prices)\n"
409+
" return log_prices[-1] - np.mean(log_prices)\n"
410+
)
411+
with patch("quantcoder.core.llm._run_async", return_value=filled):
412+
result = handler.fill_mathematical_core("OU strategy", "framework code")
413+
414+
assert result is not None
415+
assert "numpy" in result
416+
417+
def test_strips_markdown(self, mock_config):
418+
"""Markdown fences are stripped from output."""
419+
handler = self._make_handler(mock_config)
420+
md = "```python\nimport numpy as np\n```"
421+
with patch("quantcoder.core.llm._run_async", return_value=md):
422+
result = handler.fill_mathematical_core("summary", "framework")
423+
424+
assert "```" not in result
425+
426+
def test_returns_none_on_failure(self, mock_config):
427+
"""LLM exception returns None."""
428+
handler = self._make_handler(mock_config)
429+
with patch("quantcoder.core.llm._run_async", side_effect=Exception("timeout")):
430+
result = handler.fill_mathematical_core("summary", "framework")
431+
432+
assert result is None
433+
434+
340435
class TestRegenerateWithCritique:
341436
"""Tests for LLMHandler.regenerate_with_critique."""
342437

0 commit comments

Comments
 (0)