From 3d181d7a5ef96b6d56ac2e7e8d4fed86f7b14713 Mon Sep 17 00:00:00 2001 From: shaojunjie <626650687@qq.com> Date: Sat, 13 Jun 2026 20:53:05 +0800 Subject: [PATCH] fix: parse constrained content type without recipient --- src/encoding.rs | 19 +++++++++++++++---- src/tests.rs | 20 ++++++++++++++++++++ tests/test_harmony.py | 20 ++++++++++++++++++++ 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index 60257e7..2c20e19 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -1323,6 +1323,18 @@ impl StreamableParser { Ok(self) } + fn is_constrained_content_type(&self, header_part: &str) -> bool { + let constrained_format_marker = self + .encoding + .mapped_format_token(FormattingToken::ConstrainedFormat); + + if let Some(marker) = constrained_format_marker { + header_part.starts_with(marker) + } else { + false + } + } + /// Helper to parse header metadata from a decoded string. /// Returns the parsed header and any remaining content after extracting header parts. /// @@ -1420,12 +1432,11 @@ impl StreamableParser { if let Some(stripped) = last_part.strip_prefix("to=") { // The header contains a recipient but *no* content-type. recipient = Some(stripped.to_string()); - } else if num_parts == 1 { - // Only one part total (after potential role removal) and it doesn't start - // with "to=" => interpret it as a standalone recipient. + } else if num_parts == 1 && !self.is_constrained_content_type(last_part) { + // A single unconstrained part is a standalone recipient. recipient = Some(last_part.to_string()); } else { - // More than one token and the last one is not a recipient -> treat as content-type. + // The last part is a content type, which may appear without a recipient. content_type = Some(last_part.to_string()); // After removing the content-type there may be exactly one token describing the recipient. diff --git a/src/tests.rs b/src/tests.rs index 7aba934..fa318f8 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -674,6 +674,26 @@ fn test_streamable_parser_tool_call_with_constrain_adjacent() { ); } +#[test] +fn test_streamable_parser_constrained_output_without_recipient() { + let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap(); + let text = concat!( + "<|start|>assistant<|channel|>final ", + "<|constrain|>json<|message|>{\"result\":true}<|return|>" + ); + let tokens = encoding.tokenizer().encode_with_special_tokens(text); + let expected = Message::from_role_and_content(Role::Assistant, "{\"result\":true}") + .with_channel("final") + .with_content_type("<|constrain|>json"); + let mut parser = StreamableParser::new(encoding, None).unwrap(); + + for token in tokens { + parser.process(token).unwrap(); + } + + assert_eq!(parser.messages(), &[expected]); +} + #[test] fn test_missing_message_token_requires_non_strict_mode() { let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap(); diff --git a/tests/test_harmony.py b/tests/test_harmony.py index dbb9925..ade7086 100644 --- a/tests/test_harmony.py +++ b/tests/test_harmony.py @@ -983,6 +983,26 @@ def test_streamable_parser_tool_call_with_constrain_adjacent(): assert parser.messages == expected +def test_streamable_parser_constrained_output_without_recipient(): + encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) + text = ( + "<|start|>assistant<|channel|>final " + '<|constrain|>json<|message|>{"result":true}<|return|>' + ) + tokens = encoding.encode(text, allowed_special="all") + expected = ( + Message.from_role_and_content(Role.ASSISTANT, '{"result":true}') + .with_channel("final") + .with_content_type("<|constrain|>json") + ) + + parser = StreamableParser(encoding, None) + for token in tokens: + parser.process(token) + + assert parser.messages == [expected] + + @pytest.mark.parametrize("strict, expect_error", [(False, False), (True, True)]) def test_streamable_parser_missing_message_token(strict: bool, expect_error: bool): encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)