Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,8 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
error_check_ret = check_request(request)
if error_check_ret is not None:
return error_check_ret
if VariableInterface.tool_parser is not None:
request = VariableInterface.tool_parser.adjust_request(request)
session = VariableInterface.get_session(request.session_id)

json_request = await raw_request.json()
Expand Down
64 changes: 64 additions & 0 deletions lmdeploy/serve/openai/tool_parser/qwen3coder_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@
logger = get_logger('lmdeploy')


def _parse_tool_call_arguments_dict(arguments: Any) -> dict[str, Any] | None:
"""Return dict-like tool arguments for Qwen3Coder request rendering."""
if not isinstance(arguments, str):
return None

try:
parsed_arguments = json.loads(arguments)
except (json.JSONDecodeError, TypeError):
return None
if isinstance(parsed_arguments, dict):
return parsed_arguments
return None


@dataclass
class ParserState:
"""Maintains the state of parsing during tool call extraction."""
Expand Down Expand Up @@ -56,6 +70,56 @@ def __init__(self, tokenizer: object):

self.tool_call_pat = re.compile(r'\n*<tool_call>(.*?)</tool_call>', re.DOTALL)

def _normalize_request_messages(self, messages: list[dict]) -> list[dict] | None:
"""Return a render-safe copy of request messages when needed."""
normalized_messages = None

for msg_idx, message in enumerate(messages):
if not isinstance(message, dict) or message.get('role') != 'assistant':
continue
tool_calls = message.get('tool_calls')
if not isinstance(tool_calls, list):
continue

normalized_tool_calls = None
for tool_idx, tool_call in enumerate(tool_calls):
if not isinstance(tool_call, dict):
continue
function = tool_call.get('function')
if not isinstance(function, dict) or isinstance(function.get('arguments'), dict):
continue

parsed_arguments = _parse_tool_call_arguments_dict(function.get('arguments'))
if parsed_arguments is None:
continue

if normalized_messages is None:
normalized_messages = list(messages)
if normalized_tool_calls is None:
normalized_tool_calls = list(tool_calls)
normalized_message = dict(message)
normalized_message['tool_calls'] = normalized_tool_calls
normalized_messages[msg_idx] = normalized_message

normalized_function = dict(function)
normalized_function['arguments'] = parsed_arguments

normalized_tool_call = dict(tool_call)
normalized_tool_call['function'] = normalized_function
normalized_tool_calls[tool_idx] = normalized_tool_call

return normalized_messages

def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
messages = request.messages
if not isinstance(messages, list):
return request

normalized_messages = self._normalize_request_messages(messages)
if normalized_messages is None:
return request
return request.model_copy(update={'messages': normalized_messages})

def _split(self, parser_state: ParserState, parsing_content: str) -> tuple[str, str, bool]:
"""Split content into tuple: (text_content, tool_content, has_tool_end)"""
try:
Expand Down
149 changes: 149 additions & 0 deletions tests/test_lmdeploy/test_qwen3coder_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
import shortuuid

from lmdeploy.model import MODELS
from lmdeploy.serve.openai.api_server import VariableInterface
from lmdeploy.serve.openai.protocol import (
ChatCompletionRequest,
Expand Down Expand Up @@ -261,3 +262,151 @@ def test_no_think_nonstream():
first_message = resp.choices[0].message
assert first_message.content == '你好呀!✨ 很高兴见到你!'
assert first_message.reasoning_content is None


def test_adjust_request_parses_assistant_tool_call_object_arguments():
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder',
messages=[{
'role': 'user',
'content': 'hello'
}, {
'role': 'assistant',
'content': '',
'tool_calls': [{
'id': 'call_1',
'type': 'function',
'function': {
'name': 'get_weather',
'arguments': '{"city": "Paris", "units": "metric"}'
}
}]
}])

adjusted_request = parser.adjust_request(request)

assert adjusted_request is not request
assert adjusted_request.messages is not request.messages
assert adjusted_request.messages[1] is not request.messages[1]
assert adjusted_request.messages[1]['tool_calls'][0] is not request.messages[1]['tool_calls'][0]
assert adjusted_request.messages[1]['tool_calls'][0]['function']['arguments'] == {
'city': 'Paris',
'units': 'metric'
}
assert request.messages[1]['tool_calls'][0]['function']['arguments'] == '{"city": "Paris", "units": "metric"}'


@pytest.mark.parametrize('arguments', ['[1, 2, 3]', '1', '{not valid json}'])
def test_adjust_request_leaves_non_mapping_arguments_unchanged(arguments):
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder',
messages=[{
'role': 'assistant',
'content': '',
'tool_calls': [{
'id': 'call_1',
'type': 'function',
'function': {
'name': 'fn',
'arguments': arguments
}
}]
}])

adjusted_request = parser.adjust_request(request)

assert adjusted_request is request


def test_adjust_request_noops_for_string_messages():
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder', messages='hello')

adjusted_request = parser.adjust_request(request)

assert adjusted_request is request


def test_adjust_request_noops_without_assistant_tool_calls():
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder',
messages=[{
'role': 'user',
'content': 'hello'
}, {
'role': 'assistant',
'content': 'plain text response'
}, {
'role': 'tool',
'content': '',
'tool_calls': [{
'id': 'call_1',
'type': 'function',
'function': {
'name': 'fn',
'arguments': '{"x": 1}'
}
}]
}])

adjusted_request = parser.adjust_request(request)

assert adjusted_request is request


def test_adjust_request_noops_for_dict_arguments():
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder',
messages=[{
'role': 'assistant',
'content': '',
'tool_calls': [{
'id': 'call_1',
'type': 'function',
'function': {
'name': 'fn',
'arguments': {
'x': 1
}
}
}]
}])

adjusted_request = parser.adjust_request(request)

assert adjusted_request is request


@pytest.mark.parametrize('model_path', ['Qwen/Qwen3.5-35B-A3B'])
def test_adjust_request_renders_qwen_template_from_string_payload(model_path):
chat_template = MODELS.get('hf')(model_path)
parser = Qwen3CoderToolParser(tokenizer=DummyTokenizer())
request = ChatCompletionRequest(model='qwen3coder',
messages=[{
'role': 'user',
'content': 'What is the weather in Paris?'
}, {
'role': 'assistant',
'content': '',
'tool_calls': [{
'id': 'call_1',
'type': 'function',
'function': {
'name': 'get_weather',
'arguments': '{"city":"Paris","units":"metric"}'
}
}]
}])

adjusted_request = parser.adjust_request(request)
prompt = chat_template.messages2prompt(adjusted_request.messages)

assert adjusted_request is not request
assert adjusted_request.messages[1]['tool_calls'][0]['function']['arguments'] == {
'city': 'Paris',
'units': 'metric'
}
assert request.messages[1]['tool_calls'][0]['function']['arguments'] == '{"city":"Paris","units":"metric"}'
assert '<function=get_weather>' in prompt
assert '<parameter=city>\nParis\n</parameter>' in prompt
assert '<parameter=units>\nmetric\n</parameter>' in prompt
Loading