Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 41 additions & 13 deletions lmdeploy/serve/parsers/response_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def _consume_reasoning(self) -> tuple[str | None, bool]:

Behavior:
- Drops the explicit open tag if model emits it.
- Detects tool open tag within reasoning content and switches to MODE_TOOL.
- If no close tag is present, emits only the safe reasoning-text prefix and
preserves possible partial-tag suffix for the next chunk.
- If a close tag is found, emits text before the close tag as reasoning content,
Expand All @@ -350,24 +351,51 @@ def _consume_reasoning(self) -> tuple[str | None, bool]:
self._pending = self._pending[len(open_tag):]
return None, True

# Check for tool open tag within reasoning content.
# But only if reasoning close tag is not found first.
close_tag = self.profile.reasoning_close_tag
if not close_tag:
raise RuntimeError('Invariant violated: MODE_REASONING requires a reasoning_close_tag.')

idx = self._pending.find(close_tag)
# No close tag found, treat the whole pending text as reasoning content.
if idx < 0:
if not self._pending:
return None, False
out = self._pending
self._pending = ''
return out, True
close_idx = self._pending.find(close_tag)
tool_tag = self.profile.tool_open_tag
tool_idx = self._pending.find(tool_tag) if tool_tag else -1

# If close tag is found, process it first.
if close_idx >= 0:
# Check if tool tag appears before close tag.
Comment on lines +360 to +366
if tool_idx >= 0 and tool_idx < close_idx:
# Tool tag is before reasoning close - emit reasoning before tool, switch to tool mode.
reasoning_chunk = self._pending[:tool_idx] if tool_idx > 0 else None
self._pending = self._pending[tool_idx + len(tool_tag):]
self._mode = self.MODE_TOOL
if self.tool_parser is not None:
self.tool_parser.start_tool_call()
return (reasoning_chunk if reasoning_chunk else None), True

# Reasoning close comes first (or no tool tag).
reasoning_chunk = self._pending[:close_idx]
self._pending = self._pending[close_idx + len(close_tag):]
self._mode = self.MODE_PLAIN
return (reasoning_chunk if reasoning_chunk else None), True

reasoning_chunk = self._pending[:idx]
self._pending = self._pending[idx + len(close_tag):]
# reasoning part is done, switch to plain mode
self._mode = self.MODE_PLAIN
return (reasoning_chunk if reasoning_chunk else None), True
# No close tag found yet. Check for tool open tag.
if tool_idx >= 0:
# Tool tag found before reasoning close - emit reasoning before tool, switch to tool mode.
reasoning_chunk = self._pending[:tool_idx] if tool_idx > 0 else None
self._pending = self._pending[tool_idx + len(tool_tag):]
self._mode = self.MODE_TOOL
if self.tool_parser is not None:
self.tool_parser.start_tool_call()
return (reasoning_chunk if reasoning_chunk else None), True

# No close tag and no tool tag found - emit safe reasoning prefix,
# keeping possible partial-tag suffix in buffer.
if not self._pending:
return None, False
out = self._pending
Comment on lines +392 to +396
self._pending = ''
return out, True

def _consume_tool(self) -> tuple[list[DeltaToolCall], bool]:
"""Consume buffered text while in tool mode.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lmdeploy/serve/parsers/test_qwen3_5_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class TestQwen3_5ResponseParserStreaming:

@staticmethod
def _encode_ids(tokenizer, text: str) -> list[int]:
return tokenizer.encode(text, add_bos=False, add_special_tokens=False)
return tokenizer.encode(text, add_special_tokens=False)

def test_stream_chunk_matches_reference(self, tokenizer, response_parser):
"""Feed the real streaming sequence into ResponseParser.stream_chunk
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lmdeploy/serve/parsers/test_qwen3_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class TestQwenResponseParserStreaming:

@staticmethod
def _encode_ids(tokenizer, text: str) -> list[int]:
return tokenizer.encode(text, add_bos=False, add_special_tokens=False)
return tokenizer.encode(text, add_special_tokens=False)

@pytest.mark.parametrize('reference_chunks', [REFERENCE_CHUNKS_0, REFERENCE_CHUNKS_1, REFERENCE_CHUNKS_2])
def test_stream_chunk_matches_reference(self, tokenizer, response_parser, reference_chunks):
Expand Down
Loading