aitomatic · ngoclam9415 · May 10, 2026 · May 6, 2026 · May 7, 2026 · May 7, 2026
diff --git a/dana/common/llm/providers/azure.py b/dana/common/llm/providers/azure.py
@@ -13,6 +13,22 @@
 class AzureProvider(OpenAICompatibleProvider):
     """Azure OpenAI provider."""
 
+    # Azure exposes the Responses API only on api-version >= this date.
+    # Older versions return HTTP 400 BadRequest for /openai/responses.
+    _RESPONSES_API_MIN_DATE = "2025-03-01"
+
+    # Env var operators use to dial reasoning effort for Azure deployments.
+    # Valid values: "minimal" | "low" | "medium" | "high".
+    _REASONING_EFFORT_ENV_VAR = "AZURE_THINKING_EFFORT"
+
+    def _responses_api_supported(self) -> bool:
+        # api-version format is "YYYY-MM-DD" or "YYYY-MM-DD-preview"; first 10 chars
+        # are the ISO date which sorts correctly lexicographically.
+        version = getattr(self, "api_version", None)
+        if not version or len(version) < 10:
+            return False
+        return version[:10] >= self._RESPONSES_API_MIN_DATE
+
     def __init__(
         self, api_key: str | None = None, model: str = "gpt-35-turbo", base_url: str | None = None, api_version: str | None = None
     ):

diff --git a/dana/common/llm/providers/openai.py b/dana/common/llm/providers/openai.py
@@ -13,6 +13,10 @@
 class OpenAIProvider(OpenAICompatibleProvider):
     """OpenAI API provider."""
 
+    # Env var operators use to dial reasoning effort for OpenAI direct API.
+    # Valid values: "minimal" | "low" | "medium" | "high".
+    _REASONING_EFFORT_ENV_VAR = "OPENAI_THINKING_EFFORT"
+
     def __init__(self, api_key: str | None = None, model: str = "gpt-3.5-turbo", base_url: str | None = None):
         self.model = model
 

diff --git a/dana/common/llm/providers/openai_compatible_base.py b/dana/common/llm/providers/openai_compatible_base.py
diff --git a/dana/core/agent/star_agent.py b/dana/core/agent/star_agent.py
@@ -748,6 +748,18 @@ def _record_think_results(
             output_state = "exit"
 
         if not tool_calls or len(tool_calls) == 0:
+            # Persist reasoning even on direct-answer turns. Without this, the
+            # model's internal reasoning (LLMResponse.reasoning_content for
+            # gpt-5/o3/o4, or <thinking> tags / JSON reasoning fields for other
+            # codecs) is silently dropped whenever the agent answers without
+            # invoking a tool. Same emit pattern as the tool-calls branch below.
+            if reasoning and len(reasoning) > 0:
+                timeline.add_entry(
+                    TimelineEntry(
+                        entry_type=TimelineEntryType.AGENT_THOUGHTS,
+                        content=reasoning,
+                    )
+                )
             response = response if (response and len(response) > 0) else "No response generated"
             timeline.add_entry(
                 TimelineEntry(

diff --git a/scripts/verify-azure-thinking.py b/scripts/verify-azure-thinking.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""Verify that dana.common.llm surfaces thinking/reasoning for Azure gpt-5.2.
+
+Checks all three surfaces:
+  - streaming: at least one LLMStreamChunk(type="thinking") yielded
+  - non-streaming reasoning_tokens count returned in usage details
+  - non-streaming reasoning_content text populated (Responses API path)
+
+Both stream() and chat() now route to the Responses API for gpt-5/o3/o4
+when api-version supports it.
+
+Run:
+    uv run python scripts/verify-azure-thinking.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from pathlib import Path
+import sys
+
+from dotenv import load_dotenv
+
+
+ROOT = Path(__file__).resolve().parent.parent
+load_dotenv(ROOT / ".env", override=False)
+
+from dana.common.llm.providers.azure import AzureProvider  # noqa: E402
+from dana.common.llm.types import LLMMessage  # noqa: E402
+
+
+MODEL = os.getenv("AZURE_MODEL", "gpt-5.2")
+
+PROMPT = (
+    "You have 3 boxes labeled A, B, C. One holds gold, two are empty. "
+    "B's label says 'gold is in A'. C's label says 'gold is not here'. "
+    "Exactly one label is true. Where is the gold? Reason step by step, then answer."
+)
+
+
+def _hr(title: str) -> None:
+    print(f"\n{'=' * 8} {title} {'=' * 8}")
+
+
+async def check_streaming(provider: AzureProvider) -> dict:
+    _hr(f"STREAM: {MODEL} (expect Responses API + thinking chunks)")
+    use_responses = provider._should_use_responses_api()
+    print(f"_should_use_responses_api()={use_responses}")
+
+    thinking_chunks: list[str] = []
+    text_chunks: list[str] = []
+    chunk_types: dict[str, int] = {}
+
+    stream_kwargs = {"reasoning": {"effort": "medium"}}  # wrapper now adds summary="auto"
+    print(f"stream kwargs: {stream_kwargs} (wrapper merges summary='auto')")
+    async for chunk in provider.stream(messages=[LLMMessage(role="user", content=PROMPT)], **stream_kwargs):
+        chunk_types[chunk.type] = chunk_types.get(chunk.type, 0) + 1
+        if chunk.type == "thinking":
+            thinking_chunks.append(chunk.content or "")
+        elif chunk.type == "text_delta":
+            text_chunks.append(chunk.content or "")
+
+    print(f"chunk type counts: {chunk_types}")
+    if thinking_chunks:
+        preview = "".join(thinking_chunks)[:300].replace("\n", " ")
+        print(f"thinking preview ({len(''.join(thinking_chunks))} chars): {preview!r}")
+    print(f"final text ({len(''.join(text_chunks))} chars): {''.join(text_chunks)[:200]!r}...")
+
+    return {
+        "uses_responses_api": use_responses,
+        "thinking_chunk_count": len(thinking_chunks),
+        "thinking_chars": sum(len(c) for c in thinking_chunks),
+        "text_chars": sum(len(c) for c in text_chunks),
+    }
+
+
+async def check_nonstreaming(provider: AzureProvider) -> dict:
+    _hr(f"CHAT: {MODEL} (now Responses API — expect reasoning_content populated)")
+    # gpt-5* + supported api-version → wrapper routes chat() to Responses API.
+    # Pass reasoning so the model actually reasons; summary auto-defaults inside the wrapper.
+    chat_kwargs = {"reasoning": {"effort": "medium"}}
+    print(f"chat kwargs: {chat_kwargs}")
+    resp = await provider.chat(messages=[LLMMessage(role="user", content=PROMPT)], **chat_kwargs)
+    print(f"finish_reason={resp.finish_reason}")
+    print(f"usage={resp.usage}")
+    print(f"reasoning_tokens={resp.reasoning_tokens}")
+    if resp.reasoning_content:
+        preview = resp.reasoning_content[:300].replace("\n", " ")
+        print(f"reasoning_content ({len(resp.reasoning_content)} chars): {preview!r}...")
+    else:
+        print("reasoning_content=None")
+    print(f"content[:200]={(resp.content or '')[:200]!r}")
+    return {
+        "reasoning_tokens": resp.reasoning_tokens,
+        "reasoning_content_present": bool(resp.reasoning_content),
+        "reasoning_content_chars": len(resp.reasoning_content or ""),
+        "content_chars": len(resp.content or ""),
+    }
+
+
+async def main() -> int:
+    if not os.getenv("AZURE_OPENAI_API_KEY"):
+        print("ERROR: AZURE_OPENAI_API_KEY not set", file=sys.stderr)
+        return 2
+
+    api_version_override = os.getenv("AZURE_RESPONSES_API_VERSION", "2025-04-01-preview")
+    provider = AzureProvider(model=MODEL, api_version=api_version_override)
+    print(f"deployment={provider.deployment_name}  api_version={provider.api_version}")
+
+    stream_result = await check_streaming(provider)
+    chat_result = await check_nonstreaming(provider)
+
+    _hr("VERDICT")
+    stream_ok = stream_result["uses_responses_api"] and stream_result["thinking_chunk_count"] > 0
+    chat_tokens_ok = (chat_result["reasoning_tokens"] or 0) > 0
+    chat_text_ok = chat_result["reasoning_content_present"]
+
+    print(
+        f"streaming thinking blocks ............. {'PASS' if stream_ok else 'FAIL'}  "
+        f"({stream_result['thinking_chunk_count']} chunks, "
+        f"{stream_result['thinking_chars']} chars)"
+    )
+    print(f"non-streaming reasoning_tokens > 0 .... {'PASS' if chat_tokens_ok else 'FAIL'}  (tokens={chat_result['reasoning_tokens']})")
+    print(f"non-streaming reasoning_content text .. {'PASS' if chat_text_ok else 'FAIL'}  ({chat_result['reasoning_content_chars']} chars)")
+
+    return 0 if (stream_ok and chat_tokens_ok and chat_text_ok) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))
diff --git a/scripts/verify-thinking-persisted-via-coding-agent.py b/scripts/verify-thinking-persisted-via-coding-agent.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""End-to-end check: does DanaCodingAgent persist gpt-5 reasoning to timeline.json?
+
+Spins up DanaCodingAgent against Azure gpt-5.2, asks a reasoning-heavy question,
+then loads the persisted timeline.json and inspects AGENT_THOUGHTS entries to
+verify the model's internal reasoning (LLMResponse.reasoning_content) made it
+into durable storage.
+
+Path layout (per LocalTimelineRepository):
+    <CWD>/.dana/dana_agent/<agent.object_id>/sessions/<session_id>/timeline.json
+
+Run:
+    uv run python scripts/verify-thinking-persisted-via-coding-agent.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from pathlib import Path
+import sys
+import tempfile
+
+from dotenv import load_dotenv
+
+
+ROOT = Path(__file__).resolve().parent.parent
+load_dotenv(ROOT / ".env", override=False)
+
+# Force an api-version that supports the Responses API (>= 2025-03-01-preview).
+# Without this the wrapper falls back to Chat Completions and reasoning text
+# is never returned, even on gpt-5.
+os.environ["AZURE_OPENAI_API_VERSION"] = os.environ.get("AZURE_RESPONSES_API_VERSION", "2025-04-01-preview")
+
+# Imports must come AFTER env override.
+from dana.common.llm.providers.openai_compatible_base import OpenAICompatibleProvider  # noqa: E402
+from dana.core.agent.builtin_agents.dana_coding_agent import DanaCodingAgent  # noqa: E402
+
+
+# Instrumentation: log reasoning_content size on every call so it's easy to see
+# what the wrapper produced vs what the agent persisted.
+_orig_chat_via_responses = OpenAICompatibleProvider._chat_via_responses
+
+
+async def _logged_chat_via_responses(self, messages, tools=None, **kwargs):
+    resp = await _orig_chat_via_responses(self, messages, tools, **kwargs)
+    print(
+        f"[INSTR] _chat_via_responses → "
+        f"reasoning_content={len(resp.reasoning_content or '')} chars, "
+        f"reasoning_tokens={resp.reasoning_tokens or 0}, "
+        f"content={len(resp.content or '')} chars"
+    )
+    return resp
+
+
+OpenAICompatibleProvider._chat_via_responses = _logged_chat_via_responses
+
+
+PROMPT_DIRECT = (
+    "You have 3 boxes labeled A, B, C. One holds gold, two are empty. "
+    "B's label says 'gold is in A'. C's label says 'gold is not here'. "
+    "Exactly one label is true. Where is the gold? Reason step by step, then answer. "
+    "Do NOT use any tools — answer directly from reasoning."
+)
+PROMPT_TOOL = (
+    "Reason carefully about which file in the current directory is the most recent, "
+    "then use the bash tool exactly once to list files (`ls -lt`) to confirm. "
+    "Then state the answer."
+)
+SCENARIO = os.getenv("SCENARIO", "direct")  # "direct" or "tool"
+PROMPT = PROMPT_TOOL if SCENARIO == "tool" else PROMPT_DIRECT
+
+AGENT_ID = "dana-coding-agent-thinking-test"
+
+
+def _hr(title: str) -> None:
+    print(f"\n{'=' * 8} {title} {'=' * 8}")
+
+
+def _find_timeline(session_id: str) -> Path | None:
+    workspace = Path.cwd() / ".dana" / "dana_agent"
+    candidates = list(workspace.glob(f"*/sessions/{session_id}/timeline.json"))
+    return candidates[0] if candidates else None
+
+
+def _print_thought_summary(entries: list[dict]) -> tuple[int, int]:
+    """Return (count, total_chars) of substantive AGENT_THOUGHTS entries."""
+    thoughts = [e for e in entries if e.get("type") == "agent_thoughts"]
+    print(f"\nAGENT_THOUGHTS entries: {len(thoughts)}")
+    total_chars = 0
+    for i, t in enumerate(thoughts):
+        content = t.get("content", "")
+        if not isinstance(content, str):
+            print(f"  [{i}] non-string content: {type(content).__name__}")
+            continue
+        total_chars += len(content)
+        preview = content[:200].replace("\n", " ")
+        print(f"  [{i}] {len(content)} chars: {preview!r}")
+    return len(thoughts), total_chars
+
+
+async def main() -> int:
+    if not os.getenv("AZURE_OPENAI_API_KEY"):
+        print("ERROR: AZURE_OPENAI_API_KEY not set", file=sys.stderr)
+        return 2
+
+    cwd = tempfile.mkdtemp(prefix="dana_thinking_test_")
+    print(f"agent cwd: {cwd}")
+    print(f"api-version: {os.environ['AZURE_OPENAI_API_VERSION']}")
+    print(f"scenario: {SCENARIO}")
+
+    agent = DanaCodingAgent(
+        agent_id=AGENT_ID,
+        agent_type="dana_coding_agent",
+        llm_provider="azure",
+        model=os.getenv("AZURE_MODEL", "gpt-5.2"),
+        cwd=cwd,
+    )
+    print(f"session_id: {agent._session_id}")
+
+    _hr("RUNNING aquery")
+    answer = await agent.aquery(message=PROMPT)
+    print(f"answer[:200]: {str(answer or '')[:200]!r}")
+
+    _hr("LOADING TIMELINE")
+    timeline_path = _find_timeline(agent._session_id)
+    if timeline_path is None:
+        print(f"ERROR: no timeline.json found for session {agent._session_id}")
+        return 1
+    print(f"timeline: {timeline_path}")
+
+    data = json.loads(timeline_path.read_text())
+    entries = data.get("entries", [])
+    print(f"total entries: {len(entries)}")
+    entry_types = {}
+    for e in entries:
+        t = e.get("type", "?")
+        entry_types[t] = entry_types.get(t, 0) + 1
+    print(f"entry type counts: {entry_types}")
+
+    thought_count, thought_chars = _print_thought_summary(entries)
+
+    _hr("VERDICT")
+    if thought_count > 0 and thought_chars > 50:
+        print(f"PASS — reasoning persisted as AGENT_THOUGHTS ({thought_count} entries, {thought_chars} chars)")
+        print(f"\nInspect: cat {timeline_path}")
+        return 0
+
+    print("FAIL — no substantive AGENT_THOUGHTS entry in timeline")
+    print("\nLikely causes:")
+    print("  1. gpt-5.2 chose not to reason on this turn (nondeterministic without explicit effort)")
+    print("  2. Wrapper routed to Chat Completions (api-version too old?)")
+    print("  3. Codec doesn't read response.reasoning_content (only codec_with_native_tool_use does)")
+    print(f"\nInspect: cat {timeline_path}")
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))