From c16672a4613d3e2550758ad3630e5ed79da8301b Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Fri, 17 Apr 2026 11:46:29 -0400 Subject: [PATCH] Fix encoding of channel, recipient, and constrain We're seeing a lot of failures with gpt-oss-20b and gpt-oss-120b models in vLLM where we get <|channel|> markers leaking into tool call names and the model outputting reasoning after <|constrain|> tokens in very long multi-turn conversations. These are fairly trivial to reproduce with gpt-oss-20b using BFCL multi_turn_base as a simple eval, and occasionally show up with gpt-oss-120b as well. In SWE Bench Verified with gpt-oss-120b, this shows up a lot more. Here's an example of the kind of errors we see across 500 tests cases with SWE Bench Verified and gpt-oss-120b: ``` HarmonyErrors (special token leaks in message parsing): 122x Unknown role: assistant<|channel|>commentary 25x Unknown role: assistant<|channel|>bash<|channel|>commentary 24x unexpected tokens remaining in message header: Some("to=functions.bash") 3x Unknown role: final 2x Unknown role: assistant<|channel|>bash<|channel|>analysis 2x unexpected tokens remaining in message header: Some("<|constrain|>commentary") 2x Unknown role: assistant<|channel|>analysis 1x unexpected tokens remaining in message header: Some("<|constrain|>control<|end|><|start|>assistant<|channel|>commentary" 1x unexpected tokens remaining in message header: Some("bash") 1x unexpected tokens remaining in message header: Some("to=functions.bash # We'll create conftest.py with needed code<|end| 1x unexpected tokens remaining in message header: Some("to=functions.bash (We attempt to") 1x unexpected tokens remaining in message header: Some("<|constrain|>Complete_TASK_AND_SUBMIT_FINAL_OUTPUT &&") 1x could not decode header 1x Unknown role: assistant<|channel|>bash<|channel|>commentary,json 1x unexpected tokens remaining in message header: Some("to=functions.bash\"I'm thinking the repository is not trusted. We'l 1x unexpected tokens remaining in message header: Some("to=functions.bash by using git checkout...<|end|><|start|>assistant 1x unexpected tokens remaining in message header: Some("to=functions.bash as<|end|><|start|>assistant<|channel|>commentary" ``` Through experimentation and closely monitoring what the models output versus the tokens we send back in as input, I discovered two important things. First, for tool calls output to the commentary channel, the models always output the recipient and channel order backwards from what encoding.rs was doing. So, this swaps the order of the channel and recipient when outputting tool calls to the commentary channel to match what the model does, which presumably is a closer match to the training data. In anecdotal testing this seems to greatly reduce if not eliminate all the extra <|channel|> markers we were seeing in the model output. So, this change may obviate the need for #97. Second, we caught examples of the model emitting reasoning text after a <|constrain|> token in long multi-turn conversations. A real-world example of that we saw: `<|start|>assistant<|channel|>commentary to=functions.echo<|constrain|>2.0? Actually we need to write content. Use echo.<|end|><|start|>assistant<|channel|>commentary to=functions.echo<|constrain|>json<|message|>{"content": "2.0000", "file_name": "result.txt"}<|call|>`. To fix this, instead of ever passing in a bare content-type (such as `json`), this PR always emits the `<|constrain|>` special token before that content type. In long multi-turn scenarios this appears to solve the problem of the model incorrectly reasoning after that token. This is perhaps also partially a side-effect of roundtripping this model's outputs via APIs that have no provision to include a "content type" that the client can send back in, so we loose the details of whether the model output a `json` or `<|constrain|>json` and have to reconstruct as best we can on the inference side. We'll do some more testing on our side, but I wanted to go ahead and open this PR to get some eyes on the issue and any feedback about these fixes. Signed-off-by: Ben Browning --- src/encoding.rs | 37 +++++++++++++------ ...test_does_not_drop_if_ongoing_analysis.txt | 2 +- test-data/test_tool_response_parsing.txt | 2 +- tests/test_harmony.py | 2 +- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index 60257e7..d255d06 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -838,17 +838,30 @@ impl Render for HarmonyEncoding { } }; - // next render the header recipient, if there is one - if let Some(recipient) = &message.recipient { - if recipient != "all" { - self.render_text_into(format!(" to={recipient}"), into)?; + // Header channel vs recipient order: commentary uses <|channel|>… before to=…; + // other channels keep to=… before <|channel|>… (legacy order). + let commentary_channel_first = message.channel.as_deref() == Some("commentary"); + + if commentary_channel_first { + if let Some(channel) = &message.channel { + self.render_formatting_token_into(FormattingToken::Channel, into)?; + self.render_text_into(channel, into)?; + } + if let Some(recipient) = &message.recipient { + if recipient != "all" { + self.render_text_into(format!(" to={recipient}"), into)?; + } + } + } else { + if let Some(recipient) = &message.recipient { + if recipient != "all" { + self.render_text_into(format!(" to={recipient}"), into)?; + } + } + if let Some(channel) = &message.channel { + self.render_formatting_token_into(FormattingToken::Channel, into)?; + self.render_text_into(channel, into)?; } - } - - // next header channel - if let Some(channel) = &message.channel { - self.render_formatting_token_into(FormattingToken::Channel, into)?; - self.render_text_into(channel, into)?; } // finally content type @@ -865,7 +878,9 @@ impl Render for HarmonyEncoding { self.render_text_into(rest, into)?; } } else { - self.render_text_into(format!(" {content_type}"), into)?; + self.render_text_into(" ", into)?; + self.render_formatting_token_into(FormattingToken::ConstrainedFormat, into)?; + self.render_text_into(content_type, into)?; } } else { self.render_text_into(format!(" {content_type}"), into)?; diff --git a/test-data/test_does_not_drop_if_ongoing_analysis.txt b/test-data/test_does_not_drop_if_ongoing_analysis.txt index 29f8519..b837442 100644 --- a/test-data/test_does_not_drop_if_ongoing_analysis.txt +++ b/test-data/test_does_not_drop_if_ongoing_analysis.txt @@ -1 +1 @@ -<|start|>user<|message|>What is the weather in SF?<|end|><|start|>assistant<|channel|>analysis<|message|>User asks: “What is the weather in SF?” We need to use lookup_weather tool.<|end|><|start|>assistant to=functions.lookup_weather<|channel|>commentary <|constrain|>json<|message|>{"location": "San Francisco"}<|call|><|start|>functions.lookup_weather<|message|>{"temperature": 20, "description": "sunny"}<|end|><|start|>assistant \ No newline at end of file +<|start|>user<|message|>What is the weather in SF?<|end|><|start|>assistant<|channel|>analysis<|message|>User asks: “What is the weather in SF?” We need to use lookup_weather tool.<|end|><|start|>assistant<|channel|>commentary to=functions.lookup_weather <|constrain|>json<|message|>{"location": "San Francisco"}<|call|><|start|>functions.lookup_weather<|message|>{"temperature": 20, "description": "sunny"}<|end|><|start|>assistant diff --git a/test-data/test_tool_response_parsing.txt b/test-data/test_tool_response_parsing.txt index a1d4b87..a958572 100644 --- a/test-data/test_tool_response_parsing.txt +++ b/test-data/test_tool_response_parsing.txt @@ -1 +1 @@ -<|start|>browser.search to=assistant<|channel|>commentary<|message|>{"result": "https://openai.com/"} \ No newline at end of file +<|start|>browser.search<|channel|>commentary to=assistant<|message|>{"result": "https://openai.com/"} diff --git a/tests/test_harmony.py b/tests/test_harmony.py index dbb9925..688621a 100644 --- a/tests/test_harmony.py +++ b/tests/test_harmony.py @@ -245,7 +245,7 @@ def test_tool_call_with_constrain_tokenized_correctly(encoding_name): """ encoding = load_harmony_encoding(encoding_name) text = ( - "<|start|>assistant to=functions.get_weather<|channel|>commentary" + "<|start|>assistant<|channel|>commentary to=functions.get_weather" ' <|constrain|>json<|message|>{"location": "Tokyo"}<|call|>' ) tokens = encoding.encode(text, allowed_special="all")