Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions astrbot/core/astr_main_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,13 +477,68 @@ async def _request_img_caption(
return llm_resp.completion_text


_PRE_CAPTION_RESULT_KEY = "_pre_caption_result"


async def pre_caption_images(
event: AstrMessageEvent,
plugin_context: Context,
cfg: dict,
) -> None:
"""在 session lock 外提前完成图片描述,结果写入 event extra。

由 pipeline 在获取 session lock 之前调用,避免图片描述慢速 LLM
调用占用 session lock,阻塞后续消息处理。
"""
img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
if not img_cap_prov_id:
return

image_components = [
comp for comp in event.message_obj.message if isinstance(comp, Image)
]
if not image_components:
return

try:
image_urls = []
for comp in image_components:
path = await comp.convert_to_file_path()
compressed = await _compress_image_for_provider(path, cfg)
if _is_generated_compressed_image_path(path, compressed):
event.track_temporary_local_file(compressed)
image_urls.append(compressed)

caption = await _request_img_caption(
img_cap_prov_id,
cfg,
image_urls,
plugin_context,
)
event.set_extra(_PRE_CAPTION_RESULT_KEY, caption or "")
except Exception as exc: # noqa: BLE001
logger.error("预处理图片描述失败: %s", exc, exc_info=True)
event.set_extra(_PRE_CAPTION_RESULT_KEY, None)


async def _ensure_img_caption(
event: AstrMessageEvent,
req: ProviderRequest,
cfg: dict,
plugin_context: Context,
image_caption_provider: str,
) -> None:
if event.get_extra("_skip_img_caption"):
return

pre_caption = event.get_extra(_PRE_CAPTION_RESULT_KEY)
if pre_caption:
req.extra_user_content_parts.append(
TextPart(text=f"<image_caption>{pre_caption}</image_caption>")
)
req.image_urls = []
return

try:
compressed_urls = []
for url in req.image_urls:
Expand Down
6 changes: 6 additions & 0 deletions astrbot/core/config/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
"fallback_chat_models": [],
"default_image_caption_provider_id": "",
"image_caption_prompt": "Please describe the image using Chinese.",
"image_caption_wait_for_context_order": True,
"provider_pool": ["*"], # "*" 表示使用所有可用的提供者
"wake_prefix": "",
"web_search": False,
Expand Down Expand Up @@ -2979,6 +2980,11 @@ class ChatProviderTemplate(TypedDict):
"_special": "select_provider",
"hint": "留空代表不使用,可用于非多模态模型",
},
"provider_settings.image_caption_wait_for_context_order": {
"description": "图片转述时等待上下文顺序",
"type": "bool",
"hint": "开启后,同一会话中图片转述完成前,后续消息将等待,以保证上下文顺序正确;关闭后,后续消息立即响应,但上下文中图片描述可能在后续消息之后。",
},
"provider_stt_settings.enable": {
"description": "启用语音转文本",
"type": "bool",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MainAgentBuildConfig,
MainAgentBuildResult,
build_main_agent,
pre_caption_images,
)
from astrbot.core.message.components import File, Image
from astrbot.core.message.message_event_result import (
Expand Down Expand Up @@ -186,6 +187,16 @@ async def process(
logger.warning("send_typing failed", exc_info=True)
await call_event_hook(event, EventType.OnWaitingLLMRequestEvent)

if not event.get_extra("provider_request"):
plugin_context = self.ctx.plugin_manager.context
cfg = plugin_context.get_config(umo=event.unified_msg_origin).get(
"provider_settings", {}
)
if cfg.get("image_caption_wait_for_context_order", True):
await pre_caption_images(event, plugin_context, cfg)
else:
event.set_extra("_skip_img_caption", True)

async with session_lock_manager.acquire_lock(event.unified_msg_origin):
logger.debug("acquired session lock for llm request")
agent_runner: AgentRunner | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
},
"image_caption_prompt": {
"description": "Image Caption Prompt"
},
"image_caption_wait_for_context_order": {
"description": "Wait for Context Order on Image Caption",
"hint": "When enabled, subsequent messages in the same session will wait until image captioning completes, ensuring correct context order. When disabled, subsequent messages respond immediately but the image description may appear after them in context."
}
},
"provider_stt_settings": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
},
"image_caption_prompt": {
"description": "图片转述提示词"
},
"image_caption_wait_for_context_order": {
"description": "图片转述时等待上下文顺序",
"hint": "开启后,同一会话中图片转述完成前,后续消息将等待,以保证上下文顺序正确;关闭后,后续消息立即响应,但上下文中图片描述可能在后续消息之后。"
}
},
"provider_stt_settings": {
Expand Down
Loading