AstrBotDevs · Reisenbug · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py
@@ -477,13 +477,68 @@ async def _request_img_caption(
     return llm_resp.completion_text
 
 
+_PRE_CAPTION_RESULT_KEY = "_pre_caption_result"
+
+
+async def pre_caption_images(
+    event: AstrMessageEvent,
+    plugin_context: Context,
+    cfg: dict,
+) -> None:
+    """在 session lock 外提前完成图片描述，结果写入 event extra。
+
+    由 pipeline 在获取 session lock 之前调用，避免图片描述慢速 LLM
+    调用占用 session lock，阻塞后续消息处理。
+    """
+    img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
+    if not img_cap_prov_id:
+        return
+
+    image_components = [
+        comp for comp in event.message_obj.message if isinstance(comp, Image)
+    ]
+    if not image_components:
+        return
+
+    try:
+        image_urls = []
+        for comp in image_components:
+            path = await comp.convert_to_file_path()
+            compressed = await _compress_image_for_provider(path, cfg)
+            if _is_generated_compressed_image_path(path, compressed):
+                event.track_temporary_local_file(compressed)
+            image_urls.append(compressed)
+
+        caption = await _request_img_caption(
+            img_cap_prov_id,
+            cfg,
+            image_urls,
+            plugin_context,
+        )
+        event.set_extra(_PRE_CAPTION_RESULT_KEY, caption or "")
+    except Exception as exc:  # noqa: BLE001
+        logger.error("预处理图片描述失败: %s", exc, exc_info=True)
+        event.set_extra(_PRE_CAPTION_RESULT_KEY, None)
+
+
 async def _ensure_img_caption(
     event: AstrMessageEvent,
     req: ProviderRequest,
     cfg: dict,
     plugin_context: Context,
     image_caption_provider: str,
 ) -> None:
+    if event.get_extra("_skip_img_caption"):
+        return
+
+    pre_caption = event.get_extra(_PRE_CAPTION_RESULT_KEY)
+    if pre_caption:
+        req.extra_user_content_parts.append(
+            TextPart(text=f"<image_caption>{pre_caption}</image_caption>")
+        )
+        req.image_urls = []
+        return
+
     try:
         compressed_urls = []
         for url in req.image_urls:

diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py
@@ -103,6 +103,7 @@
         "fallback_chat_models": [],
         "default_image_caption_provider_id": "",
         "image_caption_prompt": "Please describe the image using Chinese.",
+        "image_caption_wait_for_context_order": True,
         "provider_pool": ["*"],  # "*" 表示使用所有可用的提供者
         "wake_prefix": "",
         "web_search": False,
@@ -2979,6 +2980,11 @@ class ChatProviderTemplate(TypedDict):
                         "_special": "select_provider",
                         "hint": "留空代表不使用，可用于非多模态模型",
                     },
+                    "provider_settings.image_caption_wait_for_context_order": {
+                        "description": "图片转述时等待上下文顺序",
+                        "type": "bool",
+                        "hint": "开启后，同一会话中图片转述完成前，后续消息将等待，以保证上下文顺序正确；关闭后，后续消息立即响应，但上下文中图片描述可能在后续消息之后。",
+                    },
                     "provider_stt_settings.enable": {
                         "description": "启用语音转文本",
                         "type": "bool",

diff --git a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py
@@ -12,6 +12,7 @@
     MainAgentBuildConfig,
     MainAgentBuildResult,
     build_main_agent,
+    pre_caption_images,
 )
 from astrbot.core.message.components import File, Image
 from astrbot.core.message.message_event_result import (
@@ -186,6 +187,16 @@ async def process(
                 logger.warning("send_typing failed", exc_info=True)
             await call_event_hook(event, EventType.OnWaitingLLMRequestEvent)
 
+            if not event.get_extra("provider_request"):
+                plugin_context = self.ctx.plugin_manager.context
+                cfg = plugin_context.get_config(umo=event.unified_msg_origin).get(
+                    "provider_settings", {}
+                )
+                if cfg.get("image_caption_wait_for_context_order", True):
+                    await pre_caption_images(event, plugin_context, cfg)
+                else:
+                    event.set_extra("_skip_img_caption", True)
+
             async with session_lock_manager.acquire_lock(event.unified_msg_origin):
                 logger.debug("acquired session lock for llm request")
                 agent_runner: AgentRunner | None = None

diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json
@@ -51,6 +51,10 @@
         },
         "image_caption_prompt": {
           "description": "Image Caption Prompt"
+        },
+        "image_caption_wait_for_context_order": {
+          "description": "Wait for Context Order on Image Caption",
+          "hint": "When enabled, subsequent messages in the same session will wait until image captioning completes, ensuring correct context order. When disabled, subsequent messages respond immediately but the image description may appear after them in context."
         }
       },
       "provider_stt_settings": {

diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json
@@ -51,6 +51,10 @@
         },
         "image_caption_prompt": {
           "description": "图片转述提示词"
+        },
+        "image_caption_wait_for_context_order": {
+          "description": "图片转述时等待上下文顺序",
+          "hint": "开启后，同一会话中图片转述完成前，后续消息将等待，以保证上下文顺序正确；关闭后，后续消息立即响应，但上下文中图片描述可能在后续消息之后。"
         }
       },
       "provider_stt_settings": {