Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,21 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
tool_start = format.per_call_start;
}

return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser +
p.end();
if (force_tools) {
return ctx.reasoning_parser + tools_parser + p.end();
}

// For templates like Llama 3.3 where tool_start is "{" (no distinctive marker),
// content stops at any brace and tools_parser takes over. If the model output
// contains JSON that isn't a valid tool call (e.g. user asked for a JSON schema),
// the tools parser fails with nothing to absorb the remaining input.
//
// Without the fallback: braces in content either silently truncate (partial match)
// or crash with HTTP 500 (full parse throws). The content-only branch lets the
// parser return the entire output as content when tool parsing fails.
auto with_tools = ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
auto content_only = ctx.reasoning_parser + p.content(p.rest()) + p.end();
return p.choice({with_tools, content_only});
}

common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
Expand Down
12 changes: 12 additions & 0 deletions tests/test-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,18 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
GGML_ASSERT(got_runtime_error && "throw path should produce std::runtime_error with parse position");
}

// Regression: Llama 3.3 tool_start="{" — model output that looks like a tool
// call but isn't must not crash. Repro: Llama-3.2-3B temp=0 tools=[get_weather]
// prompt="Write a hello world C program. Just the code, no explanation."
// model outputs: {"name": "main", "parameters": {"argc": "2", ...}}
{
auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", false);
tst.test("{\"name\": \"main\", \"parameters\": {\"argc\": \"2\"}}")
.tools({ special_function_tool })
.expect(simple_assist_msg("{\"name\": \"main\", \"parameters\": {\"argc\": \"2\"}}"))
.run();
}

// Kimi-K2-Thinking tests - custom parser
// Unique feature: tool call ID embeds function name as functions.<name>:<counter>
{
Expand Down