Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions src/llm/io_processing/gptoss/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
}

// Message appears after channel and constrain, before actual message
if (chunk == openai::Harmony::TOKEN_MESSAGE) {
std::size_t pos = chunk.find(openai::Harmony::TOKEN_MESSAGE);
if (pos != std::string::npos) {
// If previous state was channel, it means constrain was skipped
// We can push function name in case there is some in cache
if (streamState == StreamState::READING_CHANNEL) {
Expand All @@ -119,19 +120,27 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
result = wrapFirstDelta(functionNameCache, toolCallIndex);
}
}

// StreamState::READING_CONSTRAIN implement here if required

streamState = StreamState::READING_MESSAGE;
clearState();
return result;

if (chunk.size() > openai::Harmony::TOKEN_MESSAGE.size()) {
// Move chunk pointer after message tag, continue with everything after message token
chunk = chunk.substr(pos + openai::Harmony::TOKEN_MESSAGE.size());
} else {
// Entire chunk is only message tag, wait for next chunks in next iterations
return result;
}
}

if (endsWith(chunk, openai::Harmony::TOKEN_CALL) || endsWith(chunk, openai::Harmony::TOKEN_END) || endsWith(chunk, openai::Harmony::TOKEN_RETURN)) {
// find last <| and remove from chunk everything after it
std::size_t pos = chunk.rfind("<|");
if (pos != std::string::npos) {
if (pos > 0) {
std::string clearedChunk = chunk.substr(0, pos);
std::size_t tagPos = chunk.rfind("<|");
if (tagPos != std::string::npos) {
if (tagPos > 0) {
std::string clearedChunk = chunk.substr(0, tagPos);
if (!clearedChunk.empty()) {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Streaming | GPT Tool | Sending Argument Part [{}]", clearedChunk);
result = wrapDeltaIntoDocument(clearedChunk);
Expand All @@ -158,9 +167,9 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
// Cut everything after first .
// Remove and take only remaining part
// The harmony format is: <|channel|>commentary to=functions.<function_name> <|constrain|>json<|message|>{...}<|call|>
std::size_t pos = chunk.find('.');
if (pos != std::string::npos) {
chunk = chunk.substr(pos + 1);
std::size_t dotPos = chunk.find('.');
if (dotPos != std::string::npos) {
chunk = chunk.substr(dotPos + 1);
}
}
}
Expand All @@ -169,10 +178,10 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
if (isStreamingFunctionName) {
// Function names dont include space bars.
// We can rely on this fact and simply decide if function name reading phase has finished.
std::size_t pos = chunk.find(' ');
if (pos != std::string::npos) {
std::size_t spacePos = chunk.find(' ');
if (spacePos != std::string::npos) {
isStreamingFunctionName = false;
chunk = chunk.substr(0, pos);
chunk = chunk.substr(0, spacePos);
cache.clear();
}

Expand All @@ -182,9 +191,20 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
}
break;
}
case StreamState::READING_CONSTRAIN:
// Ignored, not needed for end user
case StreamState::READING_CONSTRAIN: {
// Ignore up to <|message|>
std::size_t msgPos = chunk.find(openai::Harmony::TOKEN_MESSAGE);
if (msgPos != std::string::npos) {
// ignore only up to message
chunk = chunk.substr(msgPos + openai::Harmony::TOKEN_MESSAGE.size());
streamState = StreamState::READING_MESSAGE;
clearState();
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Streaming | GPT Tool | Sending Argument Part [{}]", chunk);
return wrapDeltaIntoDocument(chunk);
}

break;
}
case StreamState::READING_MESSAGE: {
SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Streaming | GPT Tool | Sending Argument Part [{}]", chunk);
return wrapDeltaIntoDocument(chunk);
Expand Down
59 changes: 59 additions & 0 deletions src/test/llm/output_parsers/gptoss_output_parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,3 +578,62 @@ TEST_F(GptOssOutputStreamParserTest, HolisticStreamingTools) {
};
test(chunkToDeltaVec);
}

TEST_F(GptOssOutputStreamParserTest, ConstrainAndCallGluedWithToolCall) {
std::vector<std::tuple<std::string, ov::genai::GenerationFinishReason, std::optional<std::string>>> chunkToDeltaVec{
// Reasoning
{"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"analysis", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|message|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"I", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":"I"}})"}},
{" will", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" will"}})"}},
{" call", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" call"}})"}},
{" fun", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" fun"}})"}},
{"ction.", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":"ction."}})"}},
{"<|end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},

// Tool 1 (with ignored constrain, but it is glued to tool call)
{"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"commentary", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{" to=", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"fun", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ctions", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{".world ", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|constrain|>", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":0,\"function\":{\"name\":\"world\"}}]}}"},
{R"(json<|message|>{
)",
ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\n"}}]}})"},
{" ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" "}}]}})"},
{" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]}})"},
{"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]}})"},
{"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]}})"},
{" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]}})"},
{"Warsaw", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Warsaw"}}]}})"},
{"\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\""}}]}})"},
{"}<|call|>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"}"}}]}})"},

// Tool 2 empty msg
{"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"commentary", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{" to=", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"fun", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ctions", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{".world ", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|constrain|>", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":1,\"function\":{\"name\":\"world\"}}]}}"},
{"json", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|message|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"{}<|call|>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"{}"}}]}})"},

// Tool 3 empty msg
{"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"commentary", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{" to=", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"fun", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"ctions", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{".world ", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|constrain|>", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":2,\"function\":{\"name\":\"world\"}}]}}"},
{"json", ov::genai::GenerationFinishReason::NONE, std::nullopt},
{"<|message|>{}<|call|>", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":2,"function":{"arguments":"{}"}}]}})"},
};
test(chunkToDeltaVec);
}