From 451027a37886a75b28afc9245d30b01e8725da62 Mon Sep 17 00:00:00 2001
From: James O'Leary <jpohhhh@gmail.com>
Date: Fri, 20 Mar 2026 08:16:07 -0700
Subject: [PATCH] chat : add EOS token to additional_stops for autoparser
 templates

Some models emit the EOS token as text (e.g. </s>) rather than as
the special EOS token ID. The PEG parser fails at end-of-input because
the trailing EOS text isn't consumed.

Regression introduced in 566059a26 (Autoparser #18675, 2026-03-06).

Fix: add the template's EOS token to additional_stops so the server
strips it before the output reaches the parser.

Unit test:

  cmake -B build -DLLAMA_BUILD_TESTS=ON -DLLAMA_BUILD_TOOLS=OFF
  cmake --build build --target test-chat
  ./build/bin/test-chat

Server repro (bartowski/mistralai_Mistral-Small-3.2-24B-Instruct-2506-GGUF, temp=0):

  llama-server -m Mistral-Small-3.2-24B-Instruct-2506-IQ2_M.gguf --jinja

  # 200 before 566059a26, 500 after
  curl http://localhost:8080/v1/chat/completions -d '{
    "messages": [{"role": "user", "content": "Weather in Tokyo?"}],
    "tools": [{"type": "function", "function": {
      "name": "get_weather", "description": "Get weather",
      "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}
    }}],
    "temperature": 0, "max_tokens": 200
  }'
---
 common/chat-auto-parser-generator.cpp |  8 ++++++++
 tests/test-chat.cpp                   | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+)
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
index aa03aea5a91..93a464a824a 100644
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -46,6 +46,14 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
     data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
     data.preserved_tokens = autoparser.preserved_tokens;
 
+    // Some models emit the EOS token as text (e.g. </s>) rather than as the
+    // special EOS token ID. Add it as a stop so the server strips it before
+    // the parser sees it. Without this, the trailing EOS text causes the
+    // parser to fail at end-of-input.
+    if (!tmpl.eos_token().empty()) {
+        data.additional_stops.push_back(tmpl.eos_token());
+    }
+
     auto parser = autoparser.build_parser(inputs);
     data.parser = parser.save();
 
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index faac9e73062..86d299769d0 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -2502,6 +2502,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
             .tools({ special_function_tool })
             .expect(message_assist_call_id)
             .run();
+        // Regression: real Mistral Small 3.2 appends </s> after tool calls.
+        // Repro: Mistral-Small-3.2-24B-Instruct-2506 temp=0
+        //   prompt="Weather in Tokyo?" tools=[get_weather]
+        {
+            auto tmpls2 = common_chat_templates_ptr(
+                common_chat_templates_init(nullptr, read_file("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja"),
+                    /*bos=*/"", /*eos=*/"</s>"));
+            common_chat_templates_inputs inputs2;
+            inputs2.tools = { special_function_tool };
+            inputs2.add_generation_prompt = true;
+            inputs2.use_jinja = true;
+            inputs2.messages = {{"user", "hi"}};
+            auto params2 = common_chat_templates_apply(tmpls2.get(), inputs2);
+            bool has_eos_stop = false;
+            for (const auto & s : params2.additional_stops) {
+                if (s == "</s>") { has_eos_stop = true; break; }
+            }
+            GGML_ASSERT(has_eos_stop && "autoparser must add EOS to additional_stops");
+        }
     }
     // Devstral
     {