From d773167a39bfdb2d51ae386b9c34d5d734ef01f8 Mon Sep 17 00:00:00 2001
From: RivetOS Claude <noreply@rivetos.dev>
Date: Wed, 27 May 2026 12:33:32 +0000
Subject: [PATCH] [Bugfix] Default Qwen3 reasoning parser to
 prompt-has-open-think

The standard Qwen3 chat template injects <think>\n into the assistant
turn opener whenever enable_thinking is not explicitly False. That means
completion tokens only contain </think> followed by the answer, never an
opening <think>.

The current parser default (prompt_has_open_think=False) means clients
must explicitly pass chat_template_kwargs={"enable_thinking": True} in
every request, otherwise the parser sees no opening <think>, falls
through to the fully-tagged branch, and dumps the entire completion
(reasoning + close-tag + answer) into the content field with reasoning
left null.

Flip the default to True so the parser matches the template's actual
behavior out of the box. An explicit enable_thinking=False still
correctly disables open-think handling: the template injects a closed
<think></think> pair in that case, so neither token appears in the
completion and the no-prompt-open-think branch is the right code path.

Verified live on V100 TP=2 with Qwen3-based model: without the fix,
reasoning=null and 700+ chars of CoT leaked into content; with the fix,
reasoning correctly contains the CoT and content is the clean answer,
all without any client-side chat_template_kwargs.

Co-Authored-By: RivetOS Claude <noreply@rivetos.dev>
---
 vllm/reasoning/qwen3_reasoning_parser.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py
index c3961fa962..5fb59362e5 100644
--- a/vllm/reasoning/qwen3_reasoning_parser.py
+++ b/vllm/reasoning/qwen3_reasoning_parser.py
@@ -27,9 +27,11 @@ class Qwen3ReasoningParser(BaseThinkingReasoningParser):
     def __init__(self, tokenizer, *args, **kwargs):
         super().__init__(tokenizer, *args, **kwargs)
         chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {}
-        # Qwen3.5 chat templates open the <think> block in the prompt when
-        # thinking is enabled, so completion tokens may only contain </think>.
-        self.prompt_has_open_think = bool(chat_kwargs.get("enable_thinking", False))
+        # The standard Qwen3 chat template injects <think>\n into the prompt
+        # whenever enable_thinking is not explicitly False, so completion tokens
+        # may only contain </think>. Default to True and only honor an explicit
+        # enable_thinking=False to opt out.
+        self.prompt_has_open_think = bool(chat_kwargs.get("enable_thinking", True))
 
     @property
     def start_token(self) -> str: