From cb9a7d64e026057c0e978d01d3b9fd9c42a2e523 Mon Sep 17 00:00:00 2001 From: James O'Leary Date: Thu, 19 Mar 2026 23:58:10 -0700 Subject: [PATCH] chat : add content-only fallback for JSON_NATIVE tool parser For templates like Llama 3.3 where tool_start is "{" (no distinctive marker), the content parser stops at any brace and the tools parser takes over. If the model output contains braces that aren't valid tool calls, the tools parser fails with nothing to absorb the remaining input. Regression introduced in 566059a26 (Autoparser #18675, 2026-03-06). Two failure modes on current master: - Content silently truncated at first "{" (partial match) - HTTP 500 crash (full parse throws) Fix: wrap the existing parser in a choice() with a content-only fallback. The tools path is tried first; when it fails, the fallback returns everything as content. No behavior change for valid tool calls. Unit test: cmake -B build -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_TOOLS=OFF cmake --build build --target test-chat ./build/bin/test-chat Server repro (Llama 3.2 3B, temp=0, tools enabled): llama-server -m Llama-3.2-3B-Instruct-Q4_K_M.gguf --jinja # 200 before 566059a26, 500 after curl http://localhost:8080/v1/chat/completions -d '{ "messages": [{"role": "user", "content": "Write a hello world C program. Just the code, no explanation."}], "tools": [{"type": "function", "function": { "name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]} }}], "temperature": 0, "max_tokens": 200 }' --- common/chat-auto-parser-generator.cpp | 17 +++++++++++++++-- tests/test-chat.cpp | 12 ++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index aa03aea5a91..11744f4c441 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -208,8 +208,21 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont tool_start = format.per_call_start; } - return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser + - p.end(); + if (force_tools) { + return ctx.reasoning_parser + tools_parser + p.end(); + } + + // For templates like Llama 3.3 where tool_start is "{" (no distinctive marker), + // content stops at any brace and tools_parser takes over. If the model output + // contains JSON that isn't a valid tool call (e.g. user asked for a JSON schema), + // the tools parser fails with nothing to absorb the remaining input. + // + // Without the fallback: braces in content either silently truncate (partial match) + // or crash with HTTP 500 (full parse throws). The content-only branch lets the + // parser return the entire output as content when tool parsing fails. + auto with_tools = ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end(); + auto content_only = ctx.reasoning_parser + p.content(p.rest()) + p.end(); + return p.choice({with_tools, content_only}); } common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const { diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index faac9e73062..035e2e83062 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2009,6 +2009,18 @@ static void test_template_output_peg_parsers(bool detailed_debug) { GGML_ASSERT(got_runtime_error && "throw path should produce std::runtime_error with parse position"); } + // Regression: Llama 3.3 tool_start="{" — model output that looks like a tool + // call but isn't must not crash. Repro: Llama-3.2-3B temp=0 tools=[get_weather] + // prompt="Write a hello world C program. Just the code, no explanation." + // model outputs: {"name": "main", "parameters": {"argc": "2", ...}} + { + auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", false); + tst.test("{\"name\": \"main\", \"parameters\": {\"argc\": \"2\"}}") + .tools({ special_function_tool }) + .expect(simple_assist_msg("{\"name\": \"main\", \"parameters\": {\"argc\": \"2\"}}")) + .run(); + } + // Kimi-K2-Thinking tests - custom parser // Unique feature: tool call ID embeds function name as functions.: {