diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index aa03aea5a91..93a464a824a 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -46,6 +46,14 @@ common_chat_params peg_generator::generate_parser(const common_chat_template & data.format = COMMON_CHAT_FORMAT_PEG_NATIVE; data.preserved_tokens = autoparser.preserved_tokens; + // Some models emit the EOS token as text (e.g. ) rather than as the + // special EOS token ID. Add it as a stop so the server strips it before + // the parser sees it. Without this, the trailing EOS text causes the + // parser to fail at end-of-input. + if (!tmpl.eos_token().empty()) { + data.additional_stops.push_back(tmpl.eos_token()); + } + auto parser = autoparser.build_parser(inputs); data.parser = parser.save(); diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index faac9e73062..86d299769d0 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -2502,6 +2502,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { .tools({ special_function_tool }) .expect(message_assist_call_id) .run(); + // Regression: real Mistral Small 3.2 appends after tool calls. + // Repro: Mistral-Small-3.2-24B-Instruct-2506 temp=0 + // prompt="Weather in Tokyo?" tools=[get_weather] + { + auto tmpls2 = common_chat_templates_ptr( + common_chat_templates_init(nullptr, read_file("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja"), + /*bos=*/"", /*eos=*/"")); + common_chat_templates_inputs inputs2; + inputs2.tools = { special_function_tool }; + inputs2.add_generation_prompt = true; + inputs2.use_jinja = true; + inputs2.messages = {{"user", "hi"}}; + auto params2 = common_chat_templates_apply(tmpls2.get(), inputs2); + bool has_eos_stop = false; + for (const auto & s : params2.additional_stops) { + if (s == "") { has_eos_stop = true; break; } + } + GGML_ASSERT(has_eos_stop && "autoparser must add EOS to additional_stops"); + } } // Devstral {