From c16672a4613d3e2550758ad3630e5ed79da8301b Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Fri, 17 Apr 2026 11:46:29 -0400
Subject: [PATCH] Fix encoding of channel, recipient, and constrain

We're seeing a lot of failures with gpt-oss-20b and gpt-oss-120b models
in vLLM where we get <|channel|> markers leaking into tool call names
and the model outputting reasoning after <|constrain|> tokens in very
long multi-turn conversations.

These are fairly trivial to reproduce with gpt-oss-20b using BFCL
multi_turn_base as a simple eval, and occasionally show up with
gpt-oss-120b as well. In SWE Bench Verified with gpt-oss-120b, this
shows up a lot more.

Here's an example of the kind of errors we see across 500 tests cases
with SWE Bench Verified and gpt-oss-120b:

```
  HarmonyErrors (special token leaks in message parsing):
      122x  Unknown role: assistant<|channel|>commentary
       25x  Unknown role: assistant<|channel|>bash<|channel|>commentary
       24x  unexpected tokens remaining in message header: Some("to=functions.bash")
        3x  Unknown role: final
        2x  Unknown role: assistant<|channel|>bash<|channel|>analysis
        2x  unexpected tokens remaining in message header: Some("<|constrain|>commentary")
        2x  Unknown role: assistant<|channel|>analysis
        1x  unexpected tokens remaining in message header: Some("<|constrain|>control<|end|><|start|>assistant<|channel|>commentary"
        1x  unexpected tokens remaining in message header: Some("bash")
        1x  unexpected tokens remaining in message header: Some("to=functions.bash # We'll create conftest.py with needed code<|end|
        1x  unexpected tokens remaining in message header: Some("to=functions.bash (We attempt to")
        1x  unexpected tokens remaining in message header: Some("<|constrain|>Complete_TASK_AND_SUBMIT_FINAL_OUTPUT &&")
        1x  could not decode header
        1x  Unknown role: assistant<|channel|>bash<|channel|>commentary,json
        1x  unexpected tokens remaining in message header: Some("to=functions.bash\"I'm thinking the repository is not trusted. We'l
        1x  unexpected tokens remaining in message header: Some("to=functions.bash by using git checkout...<|end|><|start|>assistant
        1x  unexpected tokens remaining in message header: Some("to=functions.bash as<|end|><|start|>assistant<|channel|>commentary"
```

Through experimentation and closely monitoring what the models output
versus the tokens we send back in as input, I discovered two important
things.

First, for tool calls output to the commentary channel, the models
always output the recipient and channel order backwards from what
encoding.rs was doing. So, this swaps the order of the channel and
recipient when outputting tool calls to the commentary channel to match
what the model does, which presumably is a closer match to the training
data. In anecdotal testing this seems to greatly reduce if not eliminate
all the extra <|channel|> markers we were seeing in the model output.
So, this change may obviate the need for #97.

Second, we caught examples of the model emitting reasoning text after a
<|constrain|> token in long multi-turn conversations. A real-world
example of that we saw: `<|start|>assistant<|channel|>commentary
to=functions.echo<|constrain|>2.0? Actually we need to write content.
Use echo.<|end|><|start|>assistant<|channel|>commentary
to=functions.echo<|constrain|>json<|message|>{"content": "2.0000",
"file_name": "result.txt"}<|call|>`. To fix this, instead of ever
passing in a bare content-type (such as `json`), this PR always emits
the `<|constrain|>` special token before that content type. In long
multi-turn scenarios this appears to solve the problem of the model
incorrectly reasoning after that token. This is perhaps also partially a
side-effect of roundtripping this model's outputs via APIs that have no
provision to include a "content type" that the client can send back in,
so we loose the details of whether the model output a `json` or
`<|constrain|>json` and have to reconstruct as best we can on the
inference side.

We'll do some more testing on our side, but I wanted to go ahead and
open this PR to get some eyes on the issue and any feedback about these
fixes.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 src/encoding.rs                               | 37 +++++++++++++------
 ...test_does_not_drop_if_ongoing_analysis.txt |  2 +-
 test-data/test_tool_response_parsing.txt      |  2 +-
 tests/test_harmony.py                         |  2 +-
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/encoding.rs b/src/encoding.rs
index 60257e7..d255d06 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -838,17 +838,30 @@ impl Render<Message> for HarmonyEncoding {
             }
         };
 
-        // next render the header recipient, if there is one
-        if let Some(recipient) = &message.recipient {
-            if recipient != "all" {
-                self.render_text_into(format!(" to={recipient}"), into)?;
+        // Header channel vs recipient order: commentary uses <|channel|>… before to=…;
+        // other channels keep to=… before <|channel|>… (legacy order).
+        let commentary_channel_first = message.channel.as_deref() == Some("commentary");
+
+        if commentary_channel_first {
+            if let Some(channel) = &message.channel {
+                self.render_formatting_token_into(FormattingToken::Channel, into)?;
+                self.render_text_into(channel, into)?;
+            }
+            if let Some(recipient) = &message.recipient {
+                if recipient != "all" {
+                    self.render_text_into(format!(" to={recipient}"), into)?;
+                }
+            }
+        } else {
+            if let Some(recipient) = &message.recipient {
+                if recipient != "all" {
+                    self.render_text_into(format!(" to={recipient}"), into)?;
+                }
+            }
+            if let Some(channel) = &message.channel {
+                self.render_formatting_token_into(FormattingToken::Channel, into)?;
+                self.render_text_into(channel, into)?;
             }
-        }
-
-        // next header channel
-        if let Some(channel) = &message.channel {
-            self.render_formatting_token_into(FormattingToken::Channel, into)?;
-            self.render_text_into(channel, into)?;
         }
 
         // finally content type
@@ -865,7 +878,9 @@ impl Render<Message> for HarmonyEncoding {
                         self.render_text_into(rest, into)?;
                     }
                 } else {
-                    self.render_text_into(format!(" {content_type}"), into)?;
+                    self.render_text_into(" ", into)?;
+                    self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?;
+                    self.render_text_into(content_type, into)?;
                 }
             } else {
                 self.render_text_into(format!(" {content_type}"), into)?;
diff --git a/test-data/test_does_not_drop_if_ongoing_analysis.txt b/test-data/test_does_not_drop_if_ongoing_analysis.txt
index 29f8519..b837442 100644
--- a/test-data/test_does_not_drop_if_ongoing_analysis.txt
+++ b/test-data/test_does_not_drop_if_ongoing_analysis.txt
@@ -1 +1 @@
-<|start|>user<|message|>What is the weather in SF?<|end|><|start|>assistant<|channel|>analysis<|message|>User asks: “What is the weather in SF?” We need to use lookup_weather tool.<|end|><|start|>assistant to=functions.lookup_weather<|channel|>commentary <|constrain|>json<|message|>{"location": "San Francisco"}<|call|><|start|>functions.lookup_weather<|message|>{"temperature": 20, "description": "sunny"}<|end|><|start|>assistant
\ No newline at end of file
+<|start|>user<|message|>What is the weather in SF?<|end|><|start|>assistant<|channel|>analysis<|message|>User asks: “What is the weather in SF?” We need to use lookup_weather tool.<|end|><|start|>assistant<|channel|>commentary to=functions.lookup_weather <|constrain|>json<|message|>{"location": "San Francisco"}<|call|><|start|>functions.lookup_weather<|message|>{"temperature": 20, "description": "sunny"}<|end|><|start|>assistant
diff --git a/test-data/test_tool_response_parsing.txt b/test-data/test_tool_response_parsing.txt
index a1d4b87..a958572 100644
--- a/test-data/test_tool_response_parsing.txt
+++ b/test-data/test_tool_response_parsing.txt
@@ -1 +1 @@
-<|start|>browser.search to=assistant<|channel|>commentary<|message|>{"result": "https://openai.com/"} 
\ No newline at end of file
+<|start|>browser.search<|channel|>commentary to=assistant<|message|>{"result": "https://openai.com/"} 
diff --git a/tests/test_harmony.py b/tests/test_harmony.py
index dbb9925..688621a 100644
--- a/tests/test_harmony.py
+++ b/tests/test_harmony.py
@@ -245,7 +245,7 @@ def test_tool_call_with_constrain_tokenized_correctly(encoding_name):
     """
     encoding = load_harmony_encoding(encoding_name)
     text = (
-        "<|start|>assistant to=functions.get_weather<|channel|>commentary"
+        "<|start|>assistant<|channel|>commentary to=functions.get_weather"
         ' <|constrain|>json<|message|>{"location": "Tokyo"}<|call|>'
     )
     tokens = encoding.encode(text, allowed_special="all")