From d773167a39bfdb2d51ae386b9c34d5d734ef01f8 Mon Sep 17 00:00:00 2001 From: RivetOS Claude Date: Wed, 27 May 2026 12:33:32 +0000 Subject: [PATCH] [Bugfix] Default Qwen3 reasoning parser to prompt-has-open-think The standard Qwen3 chat template injects \n into the assistant turn opener whenever enable_thinking is not explicitly False. That means completion tokens only contain followed by the answer, never an opening . The current parser default (prompt_has_open_think=False) means clients must explicitly pass chat_template_kwargs={"enable_thinking": True} in every request, otherwise the parser sees no opening , falls through to the fully-tagged branch, and dumps the entire completion (reasoning + close-tag + answer) into the content field with reasoning left null. Flip the default to True so the parser matches the template's actual behavior out of the box. An explicit enable_thinking=False still correctly disables open-think handling: the template injects a closed pair in that case, so neither token appears in the completion and the no-prompt-open-think branch is the right code path. Verified live on V100 TP=2 with Qwen3-based model: without the fix, reasoning=null and 700+ chars of CoT leaked into content; with the fix, reasoning correctly contains the CoT and content is the clean answer, all without any client-side chat_template_kwargs. Co-Authored-By: RivetOS Claude --- vllm/reasoning/qwen3_reasoning_parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py index c3961fa962..5fb59362e5 100644 --- a/vllm/reasoning/qwen3_reasoning_parser.py +++ b/vllm/reasoning/qwen3_reasoning_parser.py @@ -27,9 +27,11 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser): def __init__(self, tokenizer, *args, **kwargs): super().__init__(tokenizer, *args, **kwargs) chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {} - # Qwen3.5 chat templates open the block in the prompt when - # thinking is enabled, so completion tokens may only contain . - self.prompt_has_open_think = bool(chat_kwargs.get("enable_thinking", False)) + # The standard Qwen3 chat template injects \n into the prompt + # whenever enable_thinking is not explicitly False, so completion tokens + # may only contain . Default to True and only honor an explicit + # enable_thinking=False to opt out. + self.prompt_has_open_think = bool(chat_kwargs.get("enable_thinking", True)) @property def start_token(self) -> str: