Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,18 @@ impl StreamableParser {
Ok(self)
}

fn is_constrained_content_type(&self, header_part: &str) -> bool {
let constrained_format_marker = self
.encoding
.mapped_format_token(FormattingToken::ConstrainedFormat);

if let Some(marker) = constrained_format_marker {
header_part.starts_with(marker)
} else {
false
}
}

/// Helper to parse header metadata from a decoded string.
/// Returns the parsed header and any remaining content after extracting header parts.
///
Expand Down Expand Up @@ -1420,12 +1432,11 @@ impl StreamableParser {
if let Some(stripped) = last_part.strip_prefix("to=") {
// The header contains a recipient but *no* content-type.
recipient = Some(stripped.to_string());
} else if num_parts == 1 {
// Only one part total (after potential role removal) and it doesn't start
// with "to=" => interpret it as a standalone recipient.
} else if num_parts == 1 && !self.is_constrained_content_type(last_part) {
// A single unconstrained part is a standalone recipient.
recipient = Some(last_part.to_string());
} else {
// More than one token and the last one is not a recipient -> treat as content-type.
// The last part is a content type, which may appear without a recipient.
content_type = Some(last_part.to_string());

// After removing the content-type there may be exactly one token describing the recipient.
Expand Down
20 changes: 20 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,26 @@ fn test_streamable_parser_tool_call_with_constrain_adjacent() {
);
}

#[test]
fn test_streamable_parser_constrained_output_without_recipient() {
let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
let text = concat!(
"<|start|>assistant<|channel|>final ",
"<|constrain|>json<|message|>{\"result\":true}<|return|>"
);
let tokens = encoding.tokenizer().encode_with_special_tokens(text);
let expected = Message::from_role_and_content(Role::Assistant, "{\"result\":true}")
.with_channel("final")
.with_content_type("<|constrain|>json");
let mut parser = StreamableParser::new(encoding, None).unwrap();

for token in tokens {
parser.process(token).unwrap();
}

assert_eq!(parser.messages(), &[expected]);
}

#[test]
fn test_missing_message_token_requires_non_strict_mode() {
let encoding = load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss).unwrap();
Expand Down
20 changes: 20 additions & 0 deletions tests/test_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,26 @@ def test_streamable_parser_tool_call_with_constrain_adjacent():
assert parser.messages == expected


def test_streamable_parser_constrained_output_without_recipient():
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
text = (
"<|start|>assistant<|channel|>final "
'<|constrain|>json<|message|>{"result":true}<|return|>'
)
tokens = encoding.encode(text, allowed_special="all")
expected = (
Message.from_role_and_content(Role.ASSISTANT, '{"result":true}')
.with_channel("final")
.with_content_type("<|constrain|>json")
)

parser = StreamableParser(encoding, None)
for token in tokens:
parser.process(token)

assert parser.messages == [expected]


@pytest.mark.parametrize("strict, expect_error", [(False, False), (True, True)])
def test_streamable_parser_missing_message_token(strict: bool, expect_error: bool):
encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
Expand Down