Add two-stage code generation: framework stubs then mathematical core

SL-Mar · claude · SL-Mar · commit 8a9c031d6d3d · 2026-02-10T19:47:58.000+01:00
Local coding LLMs (14-24B params) cannot simultaneously produce correct
QC framework code AND implement novel mathematical models. This splits
code generation into two focused LLM calls:

Stage 1 generates the QC framework with stub methods (pass bodies) for
novel math. Stage 2 gets a narrower task: implement ONLY the stub
methods given the mathematical specification.

Fallbacks: Stage 1 failure → single-shot generate_qc_code(); Stage 2
failure → keep Stage 1 framework (compiles, stubs are valid Python);
no stubs detected → skip Stage 2 entirely.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/quantcoder/core/llm.py b/quantcoder/core/llm.py
@@ -370,6 +370,149 @@ def generate_qc_code(self, summary: str) -> Optional[str]:
 
         return self._strip_markdown(code)
 
+    def generate_qc_framework(self, summary: str) -> Optional[str]:
+        """Stage 1 — Generate QC algorithm with stub methods for novel math.
+
+        Produces a compilable algorithm where standard QC framework code is
+        fully implemented, but novel mathematical models / custom indicators
+        are left as method stubs (signature + docstring + ``pass``).
+
+        Returns:
+            Code string with stubs, or None on failure.
+        """
+        self.logger.info("Stage 1: Generating QC framework with stubs")
+
+        system = (
+            "You are an expert QuantConnect algorithm developer. You write "
+            "production-quality LEAN Python algorithms.\n\n"
+            "CRITICAL RULES:\n"
+            "1. ALWAYS start with: from AlgorithmImports import *\n"
+            "2. Class must inherit from QCAlgorithm\n"
+            "3. Use snake_case methods: self.set_start_date(), self.set_cash(), "
+            "self.add_equity(), etc.\n"
+            "4. Register indicators via self methods, NOT standalone constructors\n"
+            "5. Always check indicator.is_ready before using .current.value\n"
+            "6. Use self.set_holdings() for position sizing or self.market_order() "
+            "for discrete orders\n"
+            "7. NEVER invent indicators or classes that don't exist in QuantConnect\n"
+            "8. Return ONLY Python code, no markdown, no explanations\n\n"
+            "INDICATOR SIGNATURES (these are EXACT - do NOT omit parameters):\n"
+            "- self.sma(symbol, period, resolution) -> 3 args\n"
+            "- self.ema(symbol, period, resolution) -> 3 args\n"
+            "- self.rsi(symbol, period, moving_average_type, resolution) -> 4 args\n"
+            "- self.atr(symbol, period, moving_average_type, resolution) -> 4 args\n"
+            "- self.macd(symbol, fast_period, slow_period, signal_period, "
+            "moving_average_type, resolution) -> 6 args\n"
+            "- self.bb(symbol, period, k, moving_average_type, resolution) -> 5 args\n"
+            "- self.momp(symbol, period, resolution) -> 3 args\n"
+            "- self.adx(symbol, period, resolution) -> 3 args\n\n"
+            "STUB METHODS RULE:\n"
+            "For any mathematical model, custom indicator, or non-standard "
+            "calculation (e.g., Ornstein-Uhlenbeck process, HMM, regime-switching, "
+            "jump-diffusion, custom scoring), create a METHOD STUB:\n"
+            "- Define the method with its full signature\n"
+            "- Add a docstring describing WHAT to compute and the expected "
+            "return value\n"
+            "- Use `pass` as the only body statement\n"
+            "Standard QC indicators (RSI, SMA, EMA, MACD, etc.) should be used "
+            "directly — only create stubs for novel/custom calculations.\n"
+            "The algorithm MUST be compilable even with pass-only stubs."
+        )
+
+        prompt = (
+            f"Convert this trading strategy into a complete QuantConnect Python "
+            f"algorithm:\n\n{summary}\n\n"
+            "IMPORTANT: All framework code (initialize, on_data, scheduling, "
+            "position management) must be fully implemented. Any novel "
+            "mathematical model or custom calculation should be a method stub "
+            "with a descriptive docstring and `pass` as the body."
+        )
+
+        try:
+            messages = [
+                {"role": "system", "content": system},
+                {"role": "user", "content": prompt},
+            ]
+            code = _run_async(
+                self._code_llm.chat(
+                    messages=messages,
+                    max_tokens=self.max_tokens,
+                    temperature=0.3,
+                )
+            )
+            self.logger.info(
+                f"Stage 1 framework generated with {self._code_llm.get_model_name()}"
+            )
+        except Exception as e:
+            self.logger.error(f"Stage 1 (generate_qc_framework) failed: {e}")
+            return None
+
+        return self._strip_markdown(code)
+
+    def fill_mathematical_core(
+        self, summary: str, framework_code: str
+    ) -> Optional[str]:
+        """Stage 2 — Fill stub methods with mathematical implementations.
+
+        Given a QC algorithm where novel math methods are stubs (``pass``
+        bodies), implement ONLY those methods using numpy / manual
+        calculations.  Framework code (initialize, on_data, scheduling) is
+        returned unchanged.
+
+        Returns:
+            Complete algorithm code with stubs filled, or None on failure.
+        """
+        self.logger.info("Stage 2: Filling mathematical core in stub methods")
+
+        system = (
+            "You are given a QuantConnect algorithm with placeholder methods "
+            "(pass bodies). Your ONLY job is to implement the stub methods.\n\n"
+            "RULES:\n"
+            "1. Do NOT modify initialize(), on_data(), scheduling, or position "
+            "management code.\n"
+            "2. Implement ONLY the methods whose body is currently `pass`.\n"
+            "3. Follow each stub's docstring precisely — it describes what to "
+            "compute.\n"
+            "4. Do NOT substitute standard indicators (RSI, SMA, EMA) for the "
+            "model described in the docstring.\n"
+            "5. You may import numpy as np at the top of the file.\n"
+            "6. Use self.history() to get price history as a DataFrame when "
+            "needed.\n"
+            "7. Use RollingWindow[float] or plain lists to maintain state "
+            "across calls.\n"
+            "8. Return the COMPLETE algorithm (framework + filled methods), "
+            "not just the methods.\n"
+            "9. Return ONLY Python code, no markdown, no explanations."
+        )
+
+        prompt = (
+            f"STRATEGY SPECIFICATION (for mathematical context):\n{summary}\n\n"
+            f"ALGORITHM WITH STUB METHODS:\n{framework_code}\n\n"
+            "Implement ONLY the stub methods (those with `pass` as their body). "
+            "Return the complete algorithm with the stubs filled in."
+        )
+
+        try:
+            messages = [
+                {"role": "system", "content": system},
+                {"role": "user", "content": prompt},
+            ]
+            code = _run_async(
+                self._code_llm.chat(
+                    messages=messages,
+                    max_tokens=self.max_tokens,
+                    temperature=0.3,
+                )
+            )
+            self.logger.info(
+                f"Stage 2 math core filled with {self._code_llm.get_model_name()}"
+            )
+        except Exception as e:
+            self.logger.error(f"Stage 2 (fill_mathematical_core) failed: {e}")
+            return None
+
+        return self._strip_markdown(code)
+
     def refine_code(self, code: str) -> Optional[str]:
         """Fix errors in generated QuantConnect code."""
         self.logger.info("Refining generated code")
diff --git a/quantcoder/core/processor.py b/quantcoder/core/processor.py
@@ -356,9 +356,14 @@ def extract_structure_and_generate_code(self, pdf_path: str) -> Dict:
     def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
         """Generate QuantConnect code from a pre-existing summary.
 
-        Includes a fidelity assessment loop: after the code passes syntax
-        validation, mistral evaluates whether it faithfully implements the
-        summary.  If not, qwen regenerates with structured critique.
+        Uses a two-stage pipeline:
+          Stage 1 — generate QC framework with method stubs for novel math.
+          Stage 2 — fill stub methods with mathematical implementations.
+
+        Falls back to single-shot ``generate_qc_code()`` if Stage 1 fails.
+        Falls back to the Stage 1 framework if Stage 2 fails syntax checks.
+
+        After both stages, runs the fidelity assessment loop (unchanged).
 
         Args:
             summary_text: The strategy summary text
@@ -372,9 +377,15 @@ def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
             self.logger.error("Empty summary provided")
             return None
 
-        # -- Phase 1: generate + syntax validation ---------------------------
-        qc_code = self.llm_handler.generate_qc_code(summary_text)
+        # -- Phase 1: two-stage code generation + syntax validation -----------
+
+        # Stage 1: framework with stubs (fall back to single-shot)
+        qc_code = self.llm_handler.generate_qc_framework(summary_text)
+        if not qc_code:
+            self.logger.warning("Stage 1 failed, falling back to single-shot generate_qc_code")
+            qc_code = self.llm_handler.generate_qc_code(summary_text)
 
+        # Syntax validation loop on Stage 1 output
         attempt = 0
         while qc_code and not self._validate_code(qc_code) and attempt < self.max_refine_attempts:
             self.logger.info(f"Syntax refine attempt {attempt + 1}")
@@ -388,7 +399,48 @@ def generate_code_from_summary(self, summary_text: str) -> Optional[str]:
             self.logger.error("Failed to generate syntactically valid code")
             return "QuantConnect code could not be generated successfully."
 
-        # -- Phase 2: fidelity assessment loop --------------------------------
+        # Stage 2: fill mathematical core (only if stubs detected)
+        framework_code = qc_code  # save as fallback anchor
+        if self._has_stub_methods(framework_code):
+            self.logger.info("Stubs detected — running Stage 2 (fill mathematical core)")
+            filled_code = self.llm_handler.fill_mathematical_core(
+                summary_text, framework_code
+            )
+
+            if filled_code:
+                # Syntax validation loop on Stage 2 output
+                s2_attempt = 0
+                while (
+                    not self._validate_code(filled_code)
+                    and s2_attempt < self.max_refine_attempts
+                ):
+                    self.logger.info(
+                        f"Stage 2 syntax refine attempt {s2_attempt + 1}"
+                    )
+                    refined = self.llm_handler.refine_code(filled_code)
+                    if refined and self._validate_code(refined):
+                        filled_code = refined
+                        break
+                    elif refined:
+                        filled_code = refined
+                    s2_attempt += 1
+
+                if self._validate_code(filled_code):
+                    self.logger.info("Stage 2 code is syntactically valid")
+                    qc_code = filled_code
+                else:
+                    self.logger.warning(
+                        "Stage 2 code failed syntax validation — "
+                        "keeping Stage 1 framework"
+                    )
+            else:
+                self.logger.warning(
+                    "Stage 2 returned no code — keeping Stage 1 framework"
+                )
+        else:
+            self.logger.info("No stubs detected — skipping Stage 2")
+
+        # -- Phase 2: fidelity assessment loop (unchanged) --------------------
         for fidelity_attempt in range(self.max_fidelity_attempts):
             self.logger.info(f"Fidelity assessment attempt {fidelity_attempt + 1}")
 
@@ -449,3 +501,50 @@ def _validate_code(self, code: str) -> bool:
         except Exception as e:
             self.logger.error(f"Validation error: {e}")
             return False
+
+    @staticmethod
+    def _has_stub_methods(code: str) -> bool:
+        """Detect whether *code* contains method stubs (def + docstring + pass).
+
+        A stub is a ``def`` whose body consists of only an optional docstring
+        followed by ``pass``.  We use a simple line-based heuristic:
+        scan for ``pass`` lines and look backward for a preceding ``def``
+        with only blank lines, comments, or docstring delimiters between.
+
+        False positives are harmless (Stage 2 preserves non-stub code).
+        """
+        if not code:
+            return False
+
+        lines = code.splitlines()
+        for i, line in enumerate(lines):
+            stripped = line.strip()
+            if stripped != "pass":
+                continue
+            # Walk backward from this ``pass`` looking for a ``def``
+            in_docstring = False
+            j = i - 1
+            while j >= 0:
+                prev = lines[j].strip()
+                # toggle docstring state on triple-quote lines
+                if prev.startswith('"""') or prev.startswith("'''"):
+                    if prev.count('"""') == 2 or prev.count("'''") == 2:
+                        # single-line docstring — keep walking
+                        j -= 1
+                        continue
+                    in_docstring = not in_docstring
+                    j -= 1
+                    continue
+                if in_docstring:
+                    j -= 1
+                    continue
+                # skip blank lines and comments
+                if prev == "" or prev.startswith("#"):
+                    j -= 1
+                    continue
+                # the first real statement should be a def
+                if prev.startswith("def ") and prev.endswith(":"):
+                    return True
+                # anything else means this pass is not a stub
+                break
+        return False
diff --git a/tests/test_llm.py b/tests/test_llm.py
@@ -337,6 +337,101 @@ def test_malformed_response_returns_unfaithful(self, mock_config):
         assert result["score"] == 1
 
 
+class TestGenerateQCFramework:
+    """Tests for LLMHandler.generate_qc_framework (Stage 1)."""
+
+    def _make_handler(self, mock_config):
+        with patch("quantcoder.core.llm.LLMFactory") as mock_factory:
+            mock_provider = MagicMock()
+            mock_provider.get_model_name.return_value = "qwen2.5-coder:14b"
+            mock_provider.chat = AsyncMock(return_value="test")
+            mock_factory.create.return_value = mock_provider
+            handler = LLMHandler(mock_config)
+        return handler
+
+    def test_returns_code_with_stubs(self, mock_config):
+        """Returns framework code containing stub methods."""
+        handler = self._make_handler(mock_config)
+        stub_code = (
+            "from AlgorithmImports import *\n"
+            "class OUAlgo(QCAlgorithm):\n"
+            "    def initialize(self): pass\n"
+            "    def _compute_ou_signal(self, prices):\n"
+            '        """Compute OU mean-reversion signal."""\n'
+            "        pass\n"
+        )
+        with patch("quantcoder.core.llm._run_async", return_value=stub_code):
+            result = handler.generate_qc_framework("OU mean reversion strategy")
+
+        assert result is not None
+        assert "pass" in result
+        assert "AlgorithmImports" in result
+
+    def test_strips_markdown(self, mock_config):
+        """Markdown fences are stripped from output."""
+        handler = self._make_handler(mock_config)
+        md = "```python\ndef test(): pass\n```"
+        with patch("quantcoder.core.llm._run_async", return_value=md):
+            result = handler.generate_qc_framework("strategy")
+
+        assert "```" not in result
+
+    def test_returns_none_on_failure(self, mock_config):
+        """LLM exception returns None."""
+        handler = self._make_handler(mock_config)
+        with patch("quantcoder.core.llm._run_async", side_effect=Exception("timeout")):
+            result = handler.generate_qc_framework("strategy")
+
+        assert result is None
+
+
+class TestFillMathematicalCore:
+    """Tests for LLMHandler.fill_mathematical_core (Stage 2)."""
+
+    def _make_handler(self, mock_config):
+        with patch("quantcoder.core.llm.LLMFactory") as mock_factory:
+            mock_provider = MagicMock()
+            mock_provider.get_model_name.return_value = "qwen2.5-coder:14b"
+            mock_provider.chat = AsyncMock(return_value="test")
+            mock_factory.create.return_value = mock_provider
+            handler = LLMHandler(mock_config)
+        return handler
+
+    def test_returns_filled_code(self, mock_config):
+        """Returns complete algorithm with stubs filled."""
+        handler = self._make_handler(mock_config)
+        filled = (
+            "from AlgorithmImports import *\n"
+            "import numpy as np\n"
+            "class OUAlgo(QCAlgorithm):\n"
+            "    def _compute_ou_signal(self, prices):\n"
+            "        log_prices = np.log(prices)\n"
+            "        return log_prices[-1] - np.mean(log_prices)\n"
+        )
+        with patch("quantcoder.core.llm._run_async", return_value=filled):
+            result = handler.fill_mathematical_core("OU strategy", "framework code")
+
+        assert result is not None
+        assert "numpy" in result
+
+    def test_strips_markdown(self, mock_config):
+        """Markdown fences are stripped from output."""
+        handler = self._make_handler(mock_config)
+        md = "```python\nimport numpy as np\n```"
+        with patch("quantcoder.core.llm._run_async", return_value=md):
+            result = handler.fill_mathematical_core("summary", "framework")
+
+        assert "```" not in result
+
+    def test_returns_none_on_failure(self, mock_config):
+        """LLM exception returns None."""
+        handler = self._make_handler(mock_config)
+        with patch("quantcoder.core.llm._run_async", side_effect=Exception("timeout")):
+            result = handler.fill_mathematical_core("summary", "framework")
+
+        assert result is None
+
+
 class TestRegenerateWithCritique:
     """Tests for LLMHandler.regenerate_with_critique."""
 
diff --git a/tests/test_processor.py b/tests/test_processor.py