microsoft · Akhileswara-Microsoft · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
@@ -1,3 +1,3 @@
-azure-ai-projects==2.0.0b3
+azure-ai-projects==2.1.0
 azure-identity==1.20.0
 ansible-core~=2.17.0
@@ -22,14 +22,11 @@
 from typing import AsyncIterator, Optional, cast
 
 from agent_framework import (
-    ChatMessage,
-    HandoffBuilder,
-    HandoffAgentUserRequest,
-    RequestInfoEvent,
-    WorkflowOutputEvent,
-    WorkflowStatusEvent,
+    Agent,
+    Message,
 )
-from agent_framework.azure import AzureOpenAIChatClient
+from agent_framework.orchestrations import HandoffBuilder, HandoffAgentUserRequest
+from agent_framework.openai import OpenAIChatCompletionClient
 from azure.identity import DefaultAzureCredential
 
 # Foundry imports - only used when USE_FOUNDRY=true
@@ -485,7 +482,7 @@ class ContentGenerationOrchestrator:
     Microsoft Agent Framework's HandoffBuilder.
 
     Supports two modes:
-    1. Azure OpenAI Direct (default): Uses AzureOpenAIChatClient with ad_token_provider
+    1. Azure OpenAI Direct (default): Uses OpenAIChatCompletionClient with DefaultAzureCredential
     2. Azure AI Foundry: Uses AIProjectClient with project endpoint (set USE_FOUNDRY=true)
 
     Agents:
@@ -498,7 +495,7 @@ class ContentGenerationOrchestrator:
     """
 
     def __init__(self):
-        self._chat_client = None  # Always AzureOpenAIChatClient
+        self._chat_client = None  # OpenAIChatCompletionClient instance
         self._project_client = None  # AIProjectClient for Foundry mode (used for image generation)
         self._agents: dict = {}
         self._rai_agent = None
@@ -537,44 +534,32 @@ def _get_chat_client(self):
                 self._project_client = project_client
 
                 # For chat completions, use the direct Azure OpenAI endpoint
-                # The Foundry project uses Azure OpenAI under the hood, and we need the direct endpoint
-                # to properly authenticate with Cognitive Services token
                 azure_endpoint = app_settings.azure_openai.endpoint
                 if not azure_endpoint:
                     raise ValueError("AZURE_OPENAI_ENDPOINT is required for Foundry mode chat completions")
 
-                def get_token() -> str:
-                    """Token provider callable - invoked for each request to ensure fresh tokens."""
-                    token = self._credential.get_token(TOKEN_ENDPOINT)
-                    return token.token
-
                 model_deployment = app_settings.ai_foundry.model_deployment or app_settings.azure_openai.gpt_model
                 api_version = app_settings.azure_openai.api_version
 
                 logger.info(f"Foundry mode using Azure OpenAI endpoint: {azure_endpoint}, deployment: {model_deployment}")
-                self._chat_client = AzureOpenAIChatClient(
-                    endpoint=azure_endpoint,
-                    deployment_name=model_deployment,
+                self._chat_client = OpenAIChatCompletionClient(
+                    azure_endpoint=azure_endpoint,
+                    model=model_deployment,
                     api_version=api_version,
-                    ad_token_provider=get_token,
+                    credential=self._credential,
                 )
             else:
                 # Azure OpenAI Direct mode
                 endpoint = app_settings.azure_openai.endpoint
                 if not endpoint:
                     raise ValueError("AZURE_OPENAI_ENDPOINT is not configured")
 
-                def get_token() -> str:
-                    """Token provider callable - invoked for each request to ensure fresh tokens."""
-                    token = self._credential.get_token(TOKEN_ENDPOINT)
-                    return token.token
-
-                logger.info("Using Azure OpenAI Direct mode with ad_token_provider")
-                self._chat_client = AzureOpenAIChatClient(
-                    endpoint=endpoint,
-                    deployment_name=app_settings.azure_openai.gpt_model,
+                logger.info("Using Azure OpenAI Direct mode with credential")
+                self._chat_client = OpenAIChatCompletionClient(
+                    azure_endpoint=endpoint,
+                    model=app_settings.azure_openai.gpt_model,
                     api_version=app_settings.azure_openai.api_version,
-                    ad_token_provider=get_token,
+                    credential=self._credential,
                 )
         return self._chat_client
 
@@ -589,40 +574,57 @@ def initialize(self) -> None:
         # Get the chat client
         chat_client = self._get_chat_client()
 
-        # Agent names - use underscores (AzureOpenAIChatClient works with both modes now)
+        # Agent names - use underscores (OpenAIChatCompletionClient works with both modes now)
         name_sep = "_"
 
         # Create all agents
-        triage_agent = chat_client.create_agent(
+        # NOTE: Handoff workflow participants must set
+        # require_per_service_call_history_persistence=True so local conversation
+        # history stays consistent with the service across handoff tool-call
+        # short-circuits (required by agent_framework.orchestrations.HandoffBuilder).
+        triage_agent = Agent(
+            client=chat_client,
             name=f"triage{name_sep}agent",
             instructions=TRIAGE_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        planning_agent = chat_client.create_agent(
+        planning_agent = Agent(
+            client=chat_client,
             name=f"planning{name_sep}agent",
             instructions=PLANNING_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        research_agent = chat_client.create_agent(
+        research_agent = Agent(
+            client=chat_client,
             name=f"research{name_sep}agent",
             instructions=RESEARCH_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        text_content_agent = chat_client.create_agent(
+        text_content_agent = Agent(
+            client=chat_client,
             name=f"text{name_sep}content{name_sep}agent",
             instructions=TEXT_CONTENT_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        image_content_agent = chat_client.create_agent(
+        image_content_agent = Agent(
+            client=chat_client,
             name=f"image{name_sep}content{name_sep}agent",
             instructions=IMAGE_CONTENT_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
 
-        compliance_agent = chat_client.create_agent(
+        compliance_agent = Agent(
+            client=chat_client,
             name=f"compliance{name_sep}agent",
             instructions=COMPLIANCE_INSTRUCTIONS,
+            require_per_service_call_history_persistence=True,
         )
-        self._rai_agent = chat_client.create_agent(
+        self._rai_agent = Agent(
+            client=chat_client,
             name=f"rai{name_sep}agent",
             instructions=RAI_INSTRUCTIONS,
         )
@@ -636,7 +638,7 @@ def initialize(self) -> None:
             "compliance": compliance_agent,
         }
 
-        # Workflow name - Foundry requires hyphens
+        # Workflow name
         workflow_name = f"content{name_sep}generation{name_sep}workflow"
 
         # Build the handoff workflow
@@ -736,15 +738,15 @@ async def process_message(
                 events.append(event)
 
                 # Handle different event types from the workflow
-                if isinstance(event, WorkflowStatusEvent):
+                if event.type == "status":
                     yield {
                         "type": "status",
-                        "content": event.state.name,
+                        "content": event.state.name if hasattr(event, 'state') else str(event.data),
                         "is_final": False,
                         "metadata": {"conversation_id": conversation_id}
                     }
 
-                elif isinstance(event, RequestInfoEvent):
+                elif event.type == "request_info":
                     # Workflow is requesting user input
                     if isinstance(event.data, HandoffAgentUserRequest):
                         # Extract conversation history from agent_response.messages (updated API)
@@ -773,9 +775,9 @@ async def process_message(
                             "metadata": {"conversation_id": conversation_id}
                         }
 
-                elif isinstance(event, WorkflowOutputEvent):
+                elif event.type == "output":
                     # Final output from the workflow
-                    conversation = cast(list[ChatMessage], event.data)
+                    conversation = cast(list[Message], event.data)
                     if isinstance(conversation, list) and conversation:
                         # Get the last assistant message as the final response
                         assistant_messages = [
@@ -841,15 +843,15 @@ async def send_user_response(
         try:
             responses = {request_id: user_response}
             async for event in self._workflow.send_responses_streaming(responses):
-                if isinstance(event, WorkflowStatusEvent):
+                if event.type == "status":
                     yield {
                         "type": "status",
-                        "content": event.state.name,
+                        "content": event.state.name if hasattr(event, 'state') else str(event.data),
                         "is_final": False,
                         "metadata": {"conversation_id": conversation_id}
                     }
 
-                elif isinstance(event, RequestInfoEvent):
+                elif event.type == "request_info":
                     if isinstance(event.data, HandoffAgentUserRequest):
                         # Get messages from agent_response (updated API)
                         messages = event.data.agent_response.messages if hasattr(event.data, 'agent_response') and event.data.agent_response else []
@@ -871,8 +873,8 @@ async def send_user_response(
                             "metadata": {"conversation_id": conversation_id}
                         }
 
-                elif isinstance(event, WorkflowOutputEvent):
-                    conversation = cast(list[ChatMessage], event.data)
+                elif event.type == "output":
+                    conversation = cast(list[Message], event.data)
                     if isinstance(conversation, list) and conversation:
                         assistant_messages = [
                             msg for msg in conversation

@@ -6,8 +6,9 @@ quart-cors>=0.7.0
 hypercorn>=0.17.0
 
 # Microsoft Agent Framework
-agent-framework-azure-ai==1.0.0b260114
-agent-framework-core==1.0.0b260114
+agent-framework-foundry==1.1.1
+agent-framework-core==1.1.1
+agent-framework-orchestrations==1.0.0b260421
 
 # OpenTelemetry (required by agent-framework)
 opentelemetry-semantic-conventions-ai==0.4.13
@@ -18,7 +19,7 @@ azure-cosmos>=4.7.0
 azure-storage-blob>=12.22.0
 azure-search-documents>=11.4.0
 azure-ai-contentsafety>=1.0.0
-azure-ai-projects==2.0.0b3  # Azure AI Foundry SDK (optional, for USE_FOUNDRY=true)
+azure-ai-projects==2.1.0  # Azure AI Foundry SDK (optional, for USE_FOUNDRY=true)
 
 # OpenAI
 openai>=1.45.0

@@ -9,16 +9,14 @@
 import re
 from typing import Optional
 
-from agent_framework.azure import AzureOpenAIChatClient
+from agent_framework import Agent
+from agent_framework.openai import OpenAIChatCompletionClient
 from azure.identity import DefaultAzureCredential
 
 from settings import app_settings
 
 logger = logging.getLogger(__name__)
 
-# Token endpoint for Azure OpenAI authentication
-TOKEN_ENDPOINT = "https://cognitiveservices.azure.com/.default"
-
 # Title generation instructions (from MS reference accelerator)
 TITLE_INSTRUCTIONS = """Summarize the conversation so far into a 4-word or less title.
 Do not use any quotation marks or punctuation.
@@ -57,20 +55,15 @@ def initialize(self) -> None:
 
             api_version = app_settings.azure_openai.api_version
 
-            # Create token provider function
-            def get_token() -> str:
-                """Token provider callable - invoked for each request to ensure fresh tokens."""
-                token = self._credential.get_token(TOKEN_ENDPOINT)
-                return token.token
-
-            chat_client = AzureOpenAIChatClient(
-                endpoint=endpoint,
-                deployment_name=deployment,
+            chat_client = OpenAIChatCompletionClient(
+                azure_endpoint=endpoint,
+                model=deployment,
                 api_version=api_version,
-                ad_token_provider=get_token,
+                credential=self._credential,
             )
 
-            self._agent = chat_client.create_agent(
+            self._agent = Agent(
+                client=chat_client,
                 name="title_agent",
                 instructions=TITLE_INSTRUCTIONS,
             )