From 451027a37886a75b28afc9245d30b01e8725da62 Mon Sep 17 00:00:00 2001 From: James O'Leary Date: Fri, 20 Mar 2026 08:16:07 -0700 Subject: [PATCH] chat : add EOS token to additional_stops for autoparser templates Some models emit the EOS token as text (e.g. ) rather than as the special EOS token ID. The PEG parser fails at end-of-input because the trailing EOS text isn't consumed. Regression introduced in 566059a26 (Autoparser #18675, 2026-03-06). Fix: add the template's EOS token to additional_stops so the server strips it before the output reaches the parser. Unit test: cmake -B build -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_TOOLS=OFF cmake --build build --target test-chat ./build/bin/test-chat Server repro (bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF, temp=0): llama-server -m Mistral-Small-3.2-24B-Instruct-2506-IQ2_M.gguf --jinja # 200 before 566059a26, 500 after curl http://localhost:8080/v1/chat/completions -d '{ "messages": [{"role": "user", "content": "Weather in Tokyo?"}], "tools": [{"type": "function", "function": { "name": "get_weather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]} }}], "temperature": 0, "max_tokens": 200 }' --- common/chat-auto-parser-generator.cpp | 8 ++++++++ tests/test-chat.cpp | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index aa03aea5a91..93a464a824a 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -46,6 +46,14 @@ common_chat_params peg_generator::generate_parser(const common_chat_template & data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.preserved_tokens = autoparser.preserved_tokens; + // Some models emit the EOS token as text (e.g. ) rather than as the + // special EOS token ID. Add it as a stop so the server strips it before + // the parser sees it. Without this, the trailing EOS text causes the + // parser to fail at end-of-input. + if (!tmpl.eos_token().empty()) { + data.additional_stops.push_back(tmpl.eos_token()); + } + auto parser = autoparser.build_parser(inputs); data.parser = parser.save(); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index faac9e73062..86d299769d0 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2502,6 +2502,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .tools({ special_function_tool }) .expect(message_assist_call_id) .run(); + // Regression: real Mistral Small 3.2 appends after tool calls. + // Repro: Mistral-Small-3.2-24B-Instruct-2506 temp=0 + // prompt="Weather in Tokyo?" tools=[get_weather] + { + auto tmpls2 = common_chat_templates_ptr( + common_chat_templates_init(nullptr, read_file("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja"), + /*bos=*/"", /*eos=*/"")); + common_chat_templates_inputs inputs2; + inputs2.tools = { special_function_tool }; + inputs2.add_generation_prompt = true; + inputs2.use_jinja = true; + inputs2.messages = {{"user", "hi"}}; + auto params2 = common_chat_templates_apply(tmpls2.get(), inputs2); + bool has_eos_stop = false; + for (const auto & s : params2.additional_stops) { + if (s == "") { has_eos_stop = true; break; } + } + GGML_ASSERT(has_eos_stop && "autoparser must add EOS to additional_stops"); + } } // Devstral {