From af0b09000a986d7459b2337fc260ed9cb3559111 Mon Sep 17 00:00:00 2001 From: "Nikhil Chitlur Navakiran (from Dev Box)" Date: Fri, 22 May 2026 10:51:00 -0600 Subject: [PATCH 1/4] Align message serialization to OTel spec: remove version wrapper Remove the version/messages wrapper envelope from serialized message payloads. Messages now serialize as a plain JSON array per OTel gen-ai semantic conventions, matching the .NET SDK (Agent365-dotnet#253). Changes: - Remove A365_MESSAGE_SCHEMA_VERSION constant and version field from InputMessages/OutputMessages dataclasses - Serialize as plain JSON array instead of {version, messages} object - Make OutputMessage.finish_reason default to 'stop' (required per spec) - Update fallback serialization to also use array format - Add TestSerializationFormat test class ensuring all paths produce arrays - Update all unit and integration tests to expect array format Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../observability/core/message_utils.py | 50 ++--- .../observability/core/models/messages.py | 24 +- .../observability/core/test_message_utils.py | 206 +++++++++++++++--- .../core/test_output_scope_bounded.py | 9 +- .../observability/core/test_scope_messages.py | 71 +++--- .../integration/test_message_format.py | 32 ++- .../test_observability_pipeline.py | 12 +- .../agentframework/test_span_enricher.py | 16 +- .../integration/test_message_format.py | 40 ++-- .../test_observability_pipeline.py | 31 +-- .../openai/integration/test_message_format.py | 26 +-- .../extensions/openai/test_message_mapper.py | 96 ++++---- 12 files changed, 362 insertions(+), 251 deletions(-) diff --git a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py index cfc03ae0..16d25dd7 100644 --- a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py +++ b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py @@ -4,7 +4,7 @@ """Conversion and serialization helpers for OTEL gen-ai message format. Provides normalization from plain ``list[str]`` (backward compat) to the -versioned wrapper format, and a non-throwing ``serialize_messages`` function. +structured array format, and a non-throwing ``serialize_messages`` function. """ from __future__ import annotations @@ -16,7 +16,6 @@ from typing import Union from .models.messages import ( - A365_MESSAGE_SCHEMA_VERSION, ChatMessage, InputMessages, InputMessagesParam, @@ -40,7 +39,7 @@ def is_string_list( def is_wrapped_messages( param: Union[InputMessagesParam, OutputMessagesParam], ) -> bool: - """Return ``True`` when *param* is a versioned wrapper.""" + """Return ``True`` when *param* is a structured message container.""" return isinstance(param, (InputMessages, OutputMessages)) @@ -71,7 +70,7 @@ def to_output_messages(messages: list[str]) -> list[OutputMessage]: def normalize_input_messages(param: InputMessagesParam) -> InputMessages: - """Normalize an ``InputMessagesParam`` to a versioned ``InputMessages`` wrapper. + """Normalize an ``InputMessagesParam`` to an ``InputMessages`` instance. - ``str`` → wrapped in a single-element list, then converted. - ``list[str]`` → converted to ``ChatMessage`` list and wrapped. @@ -85,7 +84,7 @@ def normalize_input_messages(param: InputMessagesParam) -> InputMessages: def normalize_output_messages(param: OutputMessagesParam) -> OutputMessages: - """Normalize an ``OutputMessagesParam`` to a versioned ``OutputMessages`` wrapper. + """Normalize an ``OutputMessagesParam`` to an ``OutputMessages`` instance. - ``str`` → wrapped in a single-element list, then converted. - ``list[str]`` → converted to ``OutputMessage`` list and wrapped. @@ -114,37 +113,34 @@ def _message_dict_factory(items: list[tuple[str, object]]) -> dict[str, object]: def serialize_messages( wrapper: Union[InputMessages, OutputMessages], ) -> str: - """Serialize a versioned message wrapper to JSON. + """Serialize a message container to a JSON array. - The output is the full wrapper object: - ``{"version":"0.1.0","messages":[...]}``. + The output is a plain JSON array of message objects per OTel gen-ai + semantic conventions: ``[{"role":"user","parts":[...]}]``. The try/except ensures telemetry recording is non-throwing even when message parts contain non-JSON-serializable values. """ try: - return json.dumps( - asdict(wrapper, dict_factory=_message_dict_factory), - default=str, - ensure_ascii=False, - ) + messages_dicts = [ + asdict(msg, dict_factory=_message_dict_factory) for msg in wrapper.messages + ] + return json.dumps(messages_dicts, default=str, ensure_ascii=False) except Exception: logger.warning("Failed to serialize messages; using fallback.", exc_info=True) messages = getattr(wrapper, "messages", []) count = len(messages) if isinstance(messages, list) else 0 noun = "message" if count == 1 else "messages" - fallback = { - "version": A365_MESSAGE_SCHEMA_VERSION, - "messages": [ - { - "role": MessageRole.SYSTEM.value, - "parts": [ - { - "type": "text", - "content": f"[serialization failed: {count} {noun}]", - } - ], - } - ], - } + fallback = [ + { + "role": MessageRole.SYSTEM.value, + "parts": [ + { + "type": "text", + "content": f"[serialization failed: {count} {noun}]", + } + ], + "finish_reason": "error", + } + ] return json.dumps(fallback, ensure_ascii=False) diff --git a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/models/messages.py b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/models/messages.py index 7302988a..9379b911 100644 --- a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/models/messages.py +++ b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/models/messages.py @@ -175,33 +175,37 @@ class ChatMessage: @dataclass class OutputMessage(ChatMessage): - """An output message produced by a model (OTEL gen-ai semantic conventions).""" + """An output message produced by a model (OTEL gen-ai semantic conventions). - finish_reason: str | None = None + ``finish_reason`` defaults to ``"stop"`` per OTel spec (required field). + """ + + finish_reason: str = "stop" # --------------------------------------------------------------------------- -# Versioned wrappers +# Message containers # --------------------------------------------------------------------------- -A365_MESSAGE_SCHEMA_VERSION: str = "0.1.0" -"""Schema version embedded in serialized message payloads.""" - @dataclass class InputMessages: - """Versioned wrapper for input messages.""" + """Represents the list of input messages sent to the model. + + Serializes as a plain JSON array per OTel gen-ai semantic conventions. + """ messages: list[ChatMessage] = field(default_factory=list) - version: str = field(default=A365_MESSAGE_SCHEMA_VERSION, init=False) @dataclass class OutputMessages: - """Versioned wrapper for output messages.""" + """Represents the list of output messages generated by the model. + + Serializes as a plain JSON array per OTel gen-ai semantic conventions. + """ messages: list[OutputMessage] = field(default_factory=list) - version: str = field(default=A365_MESSAGE_SCHEMA_VERSION, init=False) # --------------------------------------------------------------------------- diff --git a/tests/observability/core/test_message_utils.py b/tests/observability/core/test_message_utils.py index e4c75f46..4cb2b2bd 100644 --- a/tests/observability/core/test_message_utils.py +++ b/tests/observability/core/test_message_utils.py @@ -19,7 +19,6 @@ to_output_messages, ) from microsoft_agents_a365.observability.core.models.messages import ( - A365_MESSAGE_SCHEMA_VERSION, BlobPart, ChatMessage, FinishReason, @@ -110,7 +109,6 @@ class TestNormalization(unittest.TestCase): def test_normalize_input_from_strings(self): result = normalize_input_messages(["Hello"]) self.assertIsInstance(result, InputMessages) - self.assertEqual(result.version, A365_MESSAGE_SCHEMA_VERSION) self.assertEqual(len(result.messages), 1) self.assertEqual(result.messages[0].role, MessageRole.USER) @@ -126,7 +124,6 @@ def test_normalize_input_from_wrapper(self): def test_normalize_output_from_strings(self): result = normalize_output_messages(["Response"]) self.assertIsInstance(result, OutputMessages) - self.assertEqual(result.version, A365_MESSAGE_SCHEMA_VERSION) self.assertEqual(len(result.messages), 1) self.assertEqual(result.messages[0].role, MessageRole.ASSISTANT) @@ -164,11 +161,11 @@ def test_serialize_input_messages(self): result = serialize_messages(wrapper) parsed = json.loads(result) - self.assertEqual(parsed["version"], A365_MESSAGE_SCHEMA_VERSION) - self.assertEqual(len(parsed["messages"]), 1) - self.assertEqual(parsed["messages"][0]["role"], "user") - self.assertEqual(parsed["messages"][0]["parts"][0]["type"], "text") - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Hello") + self.assertIsInstance(parsed, list) + self.assertEqual(len(parsed), 1) + self.assertEqual(parsed[0]["role"], "user") + self.assertEqual(parsed[0]["parts"][0]["type"], "text") + self.assertEqual(parsed[0]["parts"][0]["content"], "Hello") def test_serialize_output_messages(self): wrapper = OutputMessages( @@ -183,9 +180,9 @@ def test_serialize_output_messages(self): result = serialize_messages(wrapper) parsed = json.loads(result) - self.assertEqual(parsed["version"], A365_MESSAGE_SCHEMA_VERSION) - self.assertEqual(parsed["messages"][0]["role"], "assistant") - self.assertEqual(parsed["messages"][0]["finish_reason"], "stop") + self.assertIsInstance(parsed, list) + self.assertEqual(parsed[0]["role"], "assistant") + self.assertEqual(parsed[0]["finish_reason"], "stop") def test_serialize_omits_none_values(self): wrapper = InputMessages( @@ -195,7 +192,7 @@ def test_serialize_omits_none_values(self): parsed = json.loads(result) # name is None so should not appear - self.assertNotIn("name", parsed["messages"][0]) + self.assertNotIn("name", parsed[0]) def test_serialize_complex_parts(self): wrapper = InputMessages( @@ -212,7 +209,7 @@ def test_serialize_complex_parts(self): result = serialize_messages(wrapper) parsed = json.loads(result) - parts = parsed["messages"][0]["parts"] + parts = parsed[0]["parts"] self.assertEqual(len(parts), 2) self.assertEqual(parts[0]["type"], "text") self.assertEqual(parts[1]["type"], "blob") @@ -237,7 +234,7 @@ def test_serialize_with_tool_call_part(self): result = serialize_messages(wrapper) parsed = json.loads(result) - part = parsed["messages"][0]["parts"][0] + part = parsed[0]["parts"][0] self.assertEqual(part["type"], "tool_call") self.assertEqual(part["name"], "search") self.assertEqual(part["id"], "call_123") @@ -259,7 +256,7 @@ def test_serialize_with_reasoning_part(self): result = serialize_messages(wrapper) parsed = json.loads(result) - parts = parsed["messages"][0]["parts"] + parts = parsed[0]["parts"] self.assertEqual(parts[0]["type"], "reasoning") self.assertEqual(parts[0]["content"], "Checking GDPR Article 5") self.assertEqual(parts[1]["type"], "text") @@ -281,30 +278,173 @@ def test_serialize_empty_messages(self): wrapper = InputMessages(messages=[]) result = serialize_messages(wrapper) parsed = json.loads(result) - self.assertEqual(parsed["version"], A365_MESSAGE_SCHEMA_VERSION) - self.assertEqual(parsed["messages"], []) + self.assertIsInstance(parsed, list) + self.assertEqual(parsed, []) -class TestVersionField(unittest.TestCase): - """Tests for the version field on wrappers.""" +class TestOutputMessageDefaults(unittest.TestCase): + """Tests for OutputMessage default finish_reason.""" - def test_input_messages_version_is_constant(self): - wrapper = InputMessages(messages=[]) - self.assertEqual(wrapper.version, A365_MESSAGE_SCHEMA_VERSION) + def test_output_message_defaults_to_stop(self): + msg = OutputMessage(role=MessageRole.ASSISTANT, parts=[TextPart(content="Hi")]) + self.assertEqual(msg.finish_reason, "stop") - def test_output_messages_version_is_constant(self): - wrapper = OutputMessages(messages=[]) - self.assertEqual(wrapper.version, A365_MESSAGE_SCHEMA_VERSION) + def test_output_message_custom_finish_reason(self): + msg = OutputMessage( + role=MessageRole.ASSISTANT, + parts=[TextPart(content="Hi")], + finish_reason=FinishReason.TOOL_CALL.value, + ) + self.assertEqual(msg.finish_reason, "tool_call") + + def test_serialized_output_includes_finish_reason(self): + """finish_reason is always present in serialized output (required per OTel spec).""" + wrapper = OutputMessages( + messages=[ + OutputMessage(role=MessageRole.ASSISTANT, parts=[TextPart(content="Hi")]) + ] + ) + result = serialize_messages(wrapper) + parsed = json.loads(result) + self.assertEqual(parsed[0]["finish_reason"], "stop") - def test_version_not_settable_via_constructor(self): - """Version field uses init=False so it cannot be passed as a constructor arg.""" - with self.assertRaises(TypeError): - InputMessages(messages=[], version="99.99.99") # type: ignore[call-arg] - def test_version_embedded_in_serialized_output(self): - wrapper = InputMessages(messages=[]) - result = json.loads(serialize_messages(wrapper)) - self.assertEqual(result["version"], A365_MESSAGE_SCHEMA_VERSION) +class TestSerializationFormat(unittest.TestCase): + """Ensure ALL serialization paths produce a plain JSON array (no wrapper object). + + Per OTel gen-ai semantic conventions, the serialized form must always be: + [{"role": "...", "parts": [...]}] + Never the old {"version": "...", "messages": [...]} format. + """ + + def _assert_plain_array(self, result: str) -> list: + """Assert result is a JSON array of message dicts with role+parts.""" + parsed = json.loads(result) + self.assertIsInstance(parsed, list, "Serialized output must be a JSON array") + self.assertNotIsInstance(parsed, dict, "Must not be a wrapper object") + for msg in parsed: + self.assertIsInstance(msg, dict) + self.assertIn("role", msg, "Each message must have 'role'") + self.assertIn("parts", msg, "Each message must have 'parts'") + self.assertIsInstance(msg["parts"], list) + self.assertNotIn("version", msg, "No 'version' field in messages") + # Top-level must never contain 'version' or 'messages' keys (old wrapper) + raw_text = result.strip() + self.assertTrue(raw_text.startswith("["), "Must start with '['") + return parsed + + def test_input_from_string(self): + """Single string normalized and serialized.""" + wrapper = normalize_input_messages("Hello") + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(msgs[0]["role"], "user") + self.assertEqual(msgs[0]["parts"][0]["content"], "Hello") + + def test_input_from_string_list(self): + """String list normalized and serialized.""" + wrapper = normalize_input_messages(["First", "Second"]) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(len(msgs), 2) + self.assertEqual(msgs[0]["parts"][0]["content"], "First") + self.assertEqual(msgs[1]["parts"][0]["content"], "Second") + + def test_input_from_structured(self): + """Pre-built InputMessages serialized.""" + wrapper = InputMessages( + messages=[ + ChatMessage(role=MessageRole.SYSTEM, parts=[TextPart(content="System")]), + ChatMessage(role=MessageRole.USER, parts=[TextPart(content="User")]), + ] + ) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(msgs[0]["role"], "system") + self.assertEqual(msgs[1]["role"], "user") + + def test_output_from_string(self): + """Single string normalized to output and serialized.""" + wrapper = normalize_output_messages("Response") + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(msgs[0]["role"], "assistant") + self.assertEqual(msgs[0]["parts"][0]["content"], "Response") + self.assertIn("finish_reason", msgs[0]) + + def test_output_from_string_list(self): + """String list normalized to output and serialized.""" + wrapper = normalize_output_messages(["A", "B"]) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(len(msgs), 2) + for msg in msgs: + self.assertEqual(msg["role"], "assistant") + self.assertIn("finish_reason", msg) + + def test_output_from_structured(self): + """Pre-built OutputMessages serialized.""" + wrapper = OutputMessages( + messages=[ + OutputMessage( + role=MessageRole.ASSISTANT, + parts=[TextPart(content="Done")], + finish_reason=FinishReason.STOP.value, + ) + ] + ) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(msgs[0]["finish_reason"], "stop") + + def test_empty_input_messages(self): + """Empty InputMessages serializes to empty array.""" + result = serialize_messages(InputMessages(messages=[])) + parsed = json.loads(result) + self.assertIsInstance(parsed, list) + self.assertEqual(parsed, []) + + def test_empty_output_messages(self): + """Empty OutputMessages serializes to empty array.""" + result = serialize_messages(OutputMessages(messages=[])) + parsed = json.loads(result) + self.assertIsInstance(parsed, list) + self.assertEqual(parsed, []) + + def test_multi_part_message(self): + """Message with multiple parts serializes as plain array.""" + wrapper = InputMessages( + messages=[ + ChatMessage( + role=MessageRole.USER, + parts=[ + TextPart(content="Look at this"), + BlobPart(modality="image", content="base64==", mime_type="image/png"), + ], + ) + ] + ) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(len(msgs[0]["parts"]), 2) + + def test_tool_call_message(self): + """Tool call parts serialize as plain array.""" + wrapper = OutputMessages( + messages=[ + OutputMessage( + role=MessageRole.ASSISTANT, + parts=[ + ToolCallRequestPart(name="search", id="c1", arguments={"q": "test"}) + ], + finish_reason=FinishReason.TOOL_CALL.value, + ) + ] + ) + result = serialize_messages(wrapper) + msgs = self._assert_plain_array(result) + self.assertEqual(msgs[0]["parts"][0]["type"], "tool_call") + self.assertEqual(msgs[0]["finish_reason"], "tool_call") if __name__ == "__main__": diff --git a/tests/observability/core/test_output_scope_bounded.py b/tests/observability/core/test_output_scope_bounded.py index dcc292ea..7ed690dc 100644 --- a/tests/observability/core/test_output_scope_bounded.py +++ b/tests/observability/core/test_output_scope_bounded.py @@ -9,7 +9,6 @@ from microsoft_agents_a365.observability.core.constants import GEN_AI_OUTPUT_MESSAGES_KEY from microsoft_agents_a365.observability.core.models.messages import ( - A365_MESSAGE_SCHEMA_VERSION, MessageRole, OutputMessage, OutputMessages, @@ -38,8 +37,8 @@ def test_record_overwrites_with_strings(self): call_args = scope.set_tag_maybe.call_args self.assertEqual(call_args[0][0], GEN_AI_OUTPUT_MESSAGES_KEY) parsed = json.loads(call_args[0][1]) - self.assertEqual(parsed["version"], A365_MESSAGE_SCHEMA_VERSION) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Final response") + self.assertIsInstance(parsed, list) + self.assertEqual(parsed[0]["parts"][0]["content"], "Final response") def test_record_overwrites_with_structured(self): """Calling record_output_messages with OutputMessages sets the attribute.""" @@ -56,7 +55,7 @@ def test_record_overwrites_with_structured(self): call_args = scope.set_tag_maybe.call_args parsed = json.loads(call_args[0][1]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Structured") + self.assertEqual(parsed[0]["parts"][0]["content"], "Structured") def test_record_overwrites_with_dict(self): """Calling record_output_messages with dict sets JSON directly.""" @@ -77,7 +76,7 @@ def test_second_call_replaces_first(self): call_args = scope.set_tag_maybe.call_args parsed = json.loads(call_args[0][1]) self.assertNotIn("First", call_args[0][1]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Second") + self.assertEqual(parsed[0]["parts"][0]["content"], "Second") if __name__ == "__main__": diff --git a/tests/observability/core/test_scope_messages.py b/tests/observability/core/test_scope_messages.py index eb75cb90..043610c6 100644 --- a/tests/observability/core/test_scope_messages.py +++ b/tests/observability/core/test_scope_messages.py @@ -28,7 +28,6 @@ GEN_AI_OUTPUT_MESSAGES_KEY, ) from microsoft_agents_a365.observability.core.models.messages import ( - A365_MESSAGE_SCHEMA_VERSION, ChatMessage, FinishReason, InputMessages, @@ -86,9 +85,9 @@ def _get_last_span_attrs(self) -> dict: self.assertTrue(spans, "Expected at least one span") return dict(getattr(spans[-1], "attributes", {}) or {}) - def _parse_messages(self, attr_value: str) -> dict: + def _parse_messages(self, attr_value: str) -> list: parsed = json.loads(attr_value) - self.assertEqual(parsed["version"], A365_MESSAGE_SCHEMA_VERSION) + self.assertIsInstance(parsed, list) return parsed @@ -96,16 +95,16 @@ class TestInvokeAgentScopeMessages(ScopeMessageTestBase): """Tests for InvokeAgentScope message recording.""" def test_record_input_messages_with_strings(self): - """Plain string list should be auto-wrapped into versioned format.""" + """Plain string list should be auto-wrapped into structured format.""" scope = InvokeAgentScope.start(Request(), self.invoke_scope_details, self.agent_details) scope.record_input_messages(["What is GDPR?"]) scope.dispose() attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 1) - self.assertEqual(parsed["messages"][0]["role"], "user") - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "What is GDPR?") + self.assertEqual(len(parsed), 1) + self.assertEqual(parsed[0]["role"], "user") + self.assertEqual(parsed[0]["parts"][0]["content"], "What is GDPR?") def test_record_input_messages_with_structured(self): """Versioned InputMessages wrapper should be serialized as-is.""" @@ -127,9 +126,9 @@ def test_record_input_messages_with_structured(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 2) - self.assertEqual(parsed["messages"][0]["role"], "system") - self.assertEqual(parsed["messages"][1]["role"], "user") + self.assertEqual(len(parsed), 2) + self.assertEqual(parsed[0]["role"], "system") + self.assertEqual(parsed[1]["role"], "user") def test_record_output_messages_with_strings(self): scope = InvokeAgentScope.start(Request(), self.invoke_scope_details, self.agent_details) @@ -138,9 +137,9 @@ def test_record_output_messages_with_strings(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["role"], "assistant") + self.assertEqual(parsed[0]["role"], "assistant") self.assertEqual( - parsed["messages"][0]["parts"][0]["content"], + parsed[0]["parts"][0]["content"], "GDPR requires data minimization.", ) @@ -163,21 +162,21 @@ def test_record_output_messages_with_structured(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - msg = parsed["messages"][0] + msg = parsed[0] self.assertEqual(msg["finish_reason"], "stop") self.assertEqual(len(msg["parts"]), 2) self.assertEqual(msg["parts"][0]["type"], "reasoning") self.assertEqual(msg["parts"][1]["type"], "text") def test_record_response_wraps_string(self): - """record_response(str) should produce versioned output messages.""" + """record_response(str) should produce structured output messages.""" scope = InvokeAgentScope.start(Request(), self.invoke_scope_details, self.agent_details) scope.record_response("Simple response") scope.dispose() attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Simple response") + self.assertEqual(parsed[0]["parts"][0]["content"], "Simple response") def test_request_content_string_auto_wrapped(self): """Request.content as plain string should be wrapped into versioned format.""" @@ -187,8 +186,8 @@ def test_request_content_string_auto_wrapped(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["role"], "user") - self.assertIn("What is GDPR?", parsed["messages"][0]["parts"][0]["content"]) + self.assertEqual(parsed[0]["role"], "user") + self.assertIn("What is GDPR?", parsed[0]["parts"][0]["content"]) def test_request_content_structured_input(self): """Request.content as InputMessages should be serialized directly.""" @@ -201,7 +200,7 @@ def test_request_content_structured_input(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Hello") + self.assertEqual(parsed[0]["parts"][0]["content"], "Hello") class TestInferenceScopeMessages(ScopeMessageTestBase): @@ -214,8 +213,8 @@ def test_record_input_messages_with_strings(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["role"], "user") - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Explain quantum computing") + self.assertEqual(parsed[0]["role"], "user") + self.assertEqual(parsed[0]["parts"][0]["content"], "Explain quantum computing") def test_record_input_messages_with_structured(self): wrapper = InputMessages( @@ -236,7 +235,7 @@ def test_record_input_messages_with_structured(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 2) + self.assertEqual(len(parsed), 2) def test_record_output_messages_with_strings(self): scope = InferenceScope.start(Request(), self.inference_details, self.agent_details) @@ -245,7 +244,7 @@ def test_record_output_messages_with_strings(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["role"], "assistant") + self.assertEqual(parsed[0]["role"], "assistant") def test_record_output_messages_with_structured(self): wrapper = OutputMessages( @@ -263,7 +262,7 @@ def test_record_output_messages_with_structured(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["finish_reason"], "stop") + self.assertEqual(parsed[0]["finish_reason"], "stop") def test_request_content_string_auto_wrapped(self): request = Request(content="Test content") @@ -272,24 +271,24 @@ def test_request_content_string_auto_wrapped(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Test content") + self.assertEqual(parsed[0]["parts"][0]["content"], "Test content") class TestOutputScopeMessages(ScopeMessageTestBase): """Tests for OutputScope structured message support.""" def test_initial_string_messages_wrapped(self): - """Response with plain strings should produce versioned output.""" + """Response with plain strings should produce structured output.""" response = Response(messages=["First", "Second"]) with OutputScope.start(Request(), response, self.agent_details): pass attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 2) - self.assertEqual(parsed["messages"][0]["role"], "assistant") - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "First") - self.assertEqual(parsed["messages"][1]["parts"][0]["content"], "Second") + self.assertEqual(len(parsed), 2) + self.assertEqual(parsed[0]["role"], "assistant") + self.assertEqual(parsed[0]["parts"][0]["content"], "First") + self.assertEqual(parsed[1]["parts"][0]["content"], "Second") def test_initial_structured_messages(self): """Response with OutputMessages should be serialized directly.""" @@ -308,7 +307,7 @@ def test_initial_structured_messages(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["finish_reason"], "stop") + self.assertEqual(parsed[0]["finish_reason"], "stop") def test_record_overwrites_string_messages(self): """record_output_messages with strings overwrites previous messages.""" @@ -318,9 +317,9 @@ def test_record_overwrites_string_messages(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 1) + self.assertEqual(len(parsed), 1) self.assertNotIn("Initial", attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Replacement") + self.assertEqual(parsed[0]["parts"][0]["content"], "Replacement") def test_record_overwrites_with_structured(self): """record_output_messages with OutputMessages overwrites previous messages.""" @@ -339,8 +338,8 @@ def test_record_overwrites_with_structured(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 1) - self.assertEqual(parsed["messages"][0]["finish_reason"], "stop") + self.assertEqual(len(parsed), 1) + self.assertEqual(parsed[0]["finish_reason"], "stop") def test_record_overwrites_with_dict(self): """record_output_messages with dict sets tool result directly.""" @@ -360,8 +359,8 @@ def test_no_record_keeps_initial(self): attrs = self._get_last_span_attrs() parsed = self._parse_messages(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(len(parsed["messages"]), 1) - self.assertEqual(parsed["messages"][0]["parts"][0]["content"], "Only initial") + self.assertEqual(len(parsed), 1) + self.assertEqual(parsed[0]["parts"][0]["content"], "Only initial") if __name__ == "__main__": diff --git a/tests/observability/extensions/agentframework/integration/test_message_format.py b/tests/observability/extensions/agentframework/integration/test_message_format.py index ff260a0f..9182fe48 100644 --- a/tests/observability/extensions/agentframework/integration/test_message_format.py +++ b/tests/observability/extensions/agentframework/integration/test_message_format.py @@ -81,7 +81,7 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: @pytest.mark.integration class TestAgentFrameworkMessageFormat: """Capture real AgentFramework span attributes after enrichment - and verify the A365 versioned message format.""" + and verify the A365 structured array message format.""" @pytest.fixture(autouse=True) def setup_observability(self) -> None: @@ -144,7 +144,7 @@ def _find_chat_spans(self) -> list[ReadableSpan]: @pytest.mark.asyncio async def test_simple_chat_message_mapping(self, chat_client: AzureOpenAIChatClient) -> None: - """Simple chat: verify exported spans contain versioned A365 messages + """Simple chat: verify exported spans contain structured A365 messages after enrichment (no manual mapper call).""" agent = RawAgent( client=chat_client, @@ -163,14 +163,10 @@ async def test_simple_chat_message_mapping(self, chat_client: AzureOpenAIChatCli attrs = dict(chat_spans[-1].attributes or {}) - # --- Input messages: enriched to versioned format --- + # --- Input messages: enriched to structured array format --- input_data = json.loads(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - # Enricher should have produced versioned wrapper for chat spans - if isinstance(input_data, dict): - assert input_data["version"] == "0.1.0" - messages = input_data["messages"] - else: - messages = input_data + assert isinstance(input_data, list) + messages = input_data roles = [m["role"] for m in messages] assert "system" in roles @@ -179,13 +175,10 @@ async def test_simple_chat_message_mapping(self, chat_client: AzureOpenAIChatCli for part in msg["parts"]: assert "type" in part - # --- Output messages: enriched to versioned format --- + # --- Output messages: enriched to structured array format --- output_data = json.loads(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - if isinstance(output_data, dict): - assert output_data["version"] == "0.1.0" - out_messages = output_data["messages"] - else: - out_messages = output_data + assert isinstance(output_data, list) + out_messages = output_data assert out_messages[0]["role"] == "assistant" assert any(p["type"] == "text" for p in out_messages[0]["parts"]) @@ -224,10 +217,11 @@ async def test_tool_call_message_mapping(self, chat_client: AzureOpenAIChatClien if not raw: continue data = json.loads(raw) - messages = data["messages"] if isinstance(data, dict) else data - for msg in messages: - for part in msg.get("parts", []): - part_types.add(part.get("type", "")) + if isinstance(data, list): + messages = data + for msg in messages: + for part in msg.get("parts", []): + part_types.add(part.get("type", "")) assert "tool_call" in part_types, f"Expected tool_call in exported parts: {part_types}" assert "tool_call_response" in part_types, ( diff --git a/tests/observability/extensions/agentframework/integration/test_observability_pipeline.py b/tests/observability/extensions/agentframework/integration/test_observability_pipeline.py index b5df805d..d4df94d8 100644 --- a/tests/observability/extensions/agentframework/integration/test_observability_pipeline.py +++ b/tests/observability/extensions/agentframework/integration/test_observability_pipeline.py @@ -188,7 +188,7 @@ async def test_pipeline_invoke_agent_with_tool_call( 2. invoke_agent span is the root (no parent) 3. Inference (chat) spans are descendants of invoke_agent 4. Tool execution spans are descendants of invoke_agent - 5. A365 message format on chat spans (versioned JSON) + 5. A365 message format on chat spans (structured array format) 6. Correct operation names and key attributes """ request = Request(content="What is 15 + 27?", session_id="test-session-pipeline") @@ -300,17 +300,15 @@ async def test_pipeline_invoke_agent_with_tool_call( attrs = dict(chat_span.attributes or {}) if GEN_AI_INPUT_MESSAGES_KEY in attrs: input_data = json.loads(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - if isinstance(input_data, dict) and "version" in input_data: - assert input_data["version"] == "0.1.0" - for msg in input_data["messages"]: + if isinstance(input_data, list): + for msg in input_data: assert "role" in msg assert "parts" in msg if GEN_AI_OUTPUT_MESSAGES_KEY in attrs: output_data = json.loads(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - if isinstance(output_data, dict) and "version" in output_data: - assert output_data["version"] == "0.1.0" - for msg in output_data["messages"]: + if isinstance(output_data, list): + for msg in output_data: assert "role" in msg assert "parts" in msg diff --git a/tests/observability/extensions/agentframework/test_span_enricher.py b/tests/observability/extensions/agentframework/test_span_enricher.py index 950359e7..5b189ce3 100644 --- a/tests/observability/extensions/agentframework/test_span_enricher.py +++ b/tests/observability/extensions/agentframework/test_span_enricher.py @@ -35,19 +35,19 @@ def test_invoke_agent_span_enrichment(self): span.name = "invoke_agent Agent365Assistant" result = enrich_agent_framework_span(span) - # Input should be versioned format with user message + # Input should be structured array format with user message input_json = json.loads(result.attributes[GEN_AI_INPUT_MESSAGES_KEY]) - self.assertEqual(input_json["version"], "0.1.0") - self.assertEqual(len(input_json["messages"]), 1) - self.assertEqual(input_json["messages"][0]["role"], "user") - self.assertEqual(input_json["messages"][0]["parts"][0]["content"], "Compute 15 % 4") + self.assertIsInstance(input_json, list) + self.assertEqual(len(input_json), 1) + self.assertEqual(input_json[0]["role"], "user") + self.assertEqual(input_json[0]["parts"][0]["content"], "Compute 15 % 4") - # Output should be versioned format: tool_call (no name -> filtered) + tool response + text + # Output should be structured array format: tool_call (no name -> filtered) + tool response + text output_json = json.loads(result.attributes[GEN_AI_OUTPUT_MESSAGES_KEY]) - self.assertEqual(output_json["version"], "0.1.0") + self.assertIsInstance(output_json, list) # tool_call with no name is filtered, tool_call_response with no id/response passes, # assistant text passes - assistant_msgs = [m for m in output_json["messages"] if m["role"] == "assistant"] + assistant_msgs = [m for m in output_json if m["role"] == "assistant"] self.assertTrue(len(assistant_msgs) >= 1) # At least one assistant message should have a text part text_parts = [p for m in assistant_msgs for p in m["parts"] if p.get("type") == "text"] diff --git a/tests/observability/extensions/langchain/integration/test_message_format.py b/tests/observability/extensions/langchain/integration/test_message_format.py index fee359eb..ae85762f 100644 --- a/tests/observability/extensions/langchain/integration/test_message_format.py +++ b/tests/observability/extensions/langchain/integration/test_message_format.py @@ -8,9 +8,8 @@ with a SpanCapturingExporter inside _EnrichingBatchSpanProcessor, then make real Azure OpenAI calls via LangChain and capture the span attributes. -Currently LangChain emits gen_ai.input.messages / gen_ai.output.messages -as plain JSON string arrays (e.g. '["Hello"]'). These tests document that -raw format and will verify the A365 versioned format once the mapper is added. +These tests verify the serialized gen_ai.input.messages / gen_ai.output.messages +structured array format emitted by the observability pipeline. """ import json @@ -145,20 +144,13 @@ async def test_simple_chat_message_mapping(self, llm: AzureChatOpenAI) -> None: print(f"\n=== gen_ai.input.messages ===\n{raw_input}") input_data = json.loads(raw_input) - # Verify structure (currently plain string list or versioned format) - if isinstance(input_data, dict) and "version" in input_data: - # Versioned A365 format (after mapper is added) - assert input_data["version"] == "0.1.0" - messages_list = input_data["messages"] - for msg in messages_list: - assert "role" in msg - assert "parts" in msg - print("\n ✓ Versioned A365 format detected") - elif isinstance(input_data, list): - # Current raw format: list of content strings - assert len(input_data) > 0 - assert any("capital" in s.lower() for s in input_data if isinstance(s, str)) - print("\n → Raw string list format (pre-mapper)") + # Verify structured array format + assert isinstance(input_data, list) + messages_list = input_data + for msg in messages_list: + assert "role" in msg + assert "parts" in msg + print("\n ✓ Structured array format detected") # --- Output messages --- raw_output = attrs.get(GEN_AI_OUTPUT_MESSAGES_KEY) @@ -166,15 +158,11 @@ async def test_simple_chat_message_mapping(self, llm: AzureChatOpenAI) -> None: print(f"\n=== gen_ai.output.messages ===\n{raw_output}") output_data = json.loads(raw_output) - if isinstance(output_data, dict) and "version" in output_data: - assert output_data["version"] == "0.1.0" - for msg in output_data["messages"]: - assert msg["role"] == "assistant" - assert any(p["type"] == "text" for p in msg["parts"]) - print("\n ✓ Versioned A365 format detected") - elif isinstance(output_data, list): - assert len(output_data) > 0 - print("\n → Raw string list format (pre-mapper)") + assert isinstance(output_data, list) + for msg in output_data: + assert msg["role"] == "assistant" + assert any(p["type"] == "text" for p in msg["parts"]) + print("\n ✓ Structured array format detected") @pytest.mark.asyncio async def test_tool_call_message_mapping(self, llm: AzureChatOpenAI) -> None: diff --git a/tests/observability/extensions/langchain/integration/test_observability_pipeline.py b/tests/observability/extensions/langchain/integration/test_observability_pipeline.py index 2bde8738..fd4551c1 100644 --- a/tests/observability/extensions/langchain/integration/test_observability_pipeline.py +++ b/tests/observability/extensions/langchain/integration/test_observability_pipeline.py @@ -7,20 +7,14 @@ InvokeAgentScope → Inference (auto-instrumented) → ToolExecution (auto-instrumented) The CustomLangChainInstrumentor automatically creates inference spans for LLM -calls and execute_tool spans for tool runs. Its built-in message mapper -converts LangChain messages into the versioned A365 message format -(``{"version": "0.1.0", "messages": [...]}``) on ``gen_ai.input.messages`` -and ``gen_ai.output.messages`` span attributes. +calls and execute_tool spans for tool runs. Its built-in message mapper +converts LangChain messages into the structured array A365 message format +(``[...]``) on ``gen_ai.input.messages`` and ``gen_ai.output.messages`` +span attributes. Wrapping the entire call in InvokeAgentScope makes all auto-instrumented spans children of the invoke_agent span (since ``separate_trace_from_runtime_context`` defaults to ``False``). - -Note: the message-format assertions accept both the versioned dict structure -*and* a raw JSON list. The raw-list branch exists for backward compatibility -with older instrumentation versions or third-party LangChain instrumentors that -emit ``gen_ai.*.messages`` as plain JSON arrays before the A365 mapper was -integrated. """ import json @@ -112,7 +106,7 @@ class TestLangChainObservabilityPipeline: Verifies that wrapping LangChain calls inside InvokeAgentScope produces a single trace with correct parent-child span hierarchy, - operation names, and A365 versioned message format attributes. + operation names, and A365 structured array message format attributes. """ @pytest.fixture(autouse=True) @@ -313,23 +307,22 @@ async def test_pipeline_invoke_agent_with_tool_call( print(f"\n✓ Found {len(tool_spans)} tool execution spans") # --- 8. A365 message format on inference spans --- - # The A365 mapper emits the versioned format {"version": "0.1.0", "messages": [...]}. - # Older or third-party instrumentors may emit a raw JSON list instead; - # the raw-list branch is kept for backward compatibility. + # The A365 mapper emits the structured array format [...]. for inf_span in inference_spans: attrs = dict(inf_span.attributes or {}) if GEN_AI_INPUT_MESSAGES_KEY in attrs: input_data = json.loads(attrs[GEN_AI_INPUT_MESSAGES_KEY]) - if isinstance(input_data, dict) and "version" in input_data: - assert input_data["version"] == "0.1.0" - for msg in input_data["messages"]: + if isinstance(input_data, list): + for msg in input_data: assert "role" in msg assert "parts" in msg if GEN_AI_OUTPUT_MESSAGES_KEY in attrs: output_data = json.loads(attrs[GEN_AI_OUTPUT_MESSAGES_KEY]) - if isinstance(output_data, dict) and "version" in output_data: - assert output_data["version"] == "0.1.0" + if isinstance(output_data, list): + for msg in output_data: + assert "role" in msg + assert "parts" in msg print( f"\n✓ All pipeline assertions passed: " diff --git a/tests/observability/extensions/openai/integration/test_message_format.py b/tests/observability/extensions/openai/integration/test_message_format.py index 705e735c..8c573e88 100644 --- a/tests/observability/extensions/openai/integration/test_message_format.py +++ b/tests/observability/extensions/openai/integration/test_message_format.py @@ -7,7 +7,7 @@ configure() → get_tracer_provider() → OpenAIAgentsTraceInstrumentor with real Azure OpenAI API calls. The message mapping is applied directly in trace_processor before spans are ended, converting raw OpenAI messages -to the A365 versioned format (v0.1.0) with typed parts. +to the A365 structured array format with typed parts. """ import json @@ -125,7 +125,7 @@ def _span_to_json(span: ReadableSpan) -> dict[str, object]: @pytest.mark.integration class TestOpenAIMessageFormat: """Capture real OpenAI Agents SDK span attributes after enrichment - and verify the A365 versioned message format.""" + and verify the A365 structured array message format.""" @pytest.fixture(autouse=True) def setup_observability(self) -> None: @@ -182,7 +182,7 @@ async def test_simple_chat_message_mapping( openai_client: AsyncAzureOpenAI, azure_openai_config: dict[str, Any], ) -> None: - """Simple chat: verify exported spans contain versioned A365 messages.""" + """Simple chat: verify exported spans contain structured A365 messages.""" agent = Agent( name="TestAgent", instructions="You are a helpful assistant. Reply in one sentence.", @@ -208,16 +208,16 @@ async def test_simple_chat_message_mapping( f"No message spans found. All spans: {[s.name for s in self.exporter.spans]}" ) - # Verify at least one span has versioned A365 format - found_versioned = False + # Verify at least one span has the structured array A365 format + found_structured = False for span in message_spans: attrs = dict(span.attributes or {}) raw_input = attrs.get(GEN_AI_INPUT_MESSAGES_KEY) if raw_input: input_data = json.loads(raw_input) - if isinstance(input_data, dict) and input_data.get("version") == "0.1.0": - found_versioned = True - messages = input_data["messages"] + if isinstance(input_data, list): + found_structured = True + messages = input_data roles = [m["role"] for m in messages] assert "user" in roles for msg in messages: @@ -227,12 +227,12 @@ async def test_simple_chat_message_mapping( raw_output = attrs.get(GEN_AI_OUTPUT_MESSAGES_KEY) if raw_output: output_data = json.loads(raw_output) - if isinstance(output_data, dict) and output_data.get("version") == "0.1.0": - out_messages = output_data["messages"] + if isinstance(output_data, list): + out_messages = output_data assert out_messages[0]["role"] == "assistant" assert any(p["type"] == "text" for p in out_messages[0]["parts"]) - assert found_versioned, "Expected at least one span with versioned A365 message format" + assert found_structured, "Expected at least one span with structured A365 message format" @pytest.mark.asyncio async def test_tool_call_message_mapping( @@ -274,8 +274,8 @@ async def test_tool_call_message_mapping( if not raw: continue data = json.loads(raw) - if isinstance(data, dict) and "messages" in data: - messages = data["messages"] + if isinstance(data, list): + messages = data for msg in messages: for part in msg.get("parts", []): part_types.add(part.get("type", "")) diff --git a/tests/observability/extensions/openai/test_message_mapper.py b/tests/observability/extensions/openai/test_message_mapper.py index 88ca83a8..a551bff7 100644 --- a/tests/observability/extensions/openai/test_message_mapper.py +++ b/tests/observability/extensions/openai/test_message_mapper.py @@ -24,11 +24,11 @@ def test_plain_string_wraps_as_user_message(self) -> None: result = map_input_messages("Hello world") assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert len(data["messages"]) == 1 - assert data["messages"][0]["role"] == "user" - assert data["messages"][0]["parts"][0]["type"] == "text" - assert data["messages"][0]["parts"][0]["content"] == "Hello world" + + assert len(data) == 1 + assert data[0]["role"] == "user" + assert data[0]["parts"][0]["type"] == "text" + assert data[0]["parts"][0]["content"] == "Hello world" def test_chat_completions_format(self) -> None: """Standard chat completions format with system + user messages.""" @@ -39,12 +39,12 @@ def test_chat_completions_format(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert len(data["messages"]) == 2 - assert data["messages"][0]["role"] == "system" - assert data["messages"][0]["parts"][0]["content"] == "You are helpful." - assert data["messages"][1]["role"] == "user" - assert data["messages"][1]["parts"][0]["content"] == "Hi there" + + assert len(data) == 2 + assert data[0]["role"] == "system" + assert data[0]["parts"][0]["content"] == "You are helpful." + assert data[1]["role"] == "user" + assert data[1]["parts"][0]["content"] == "Hi there" def test_chat_completions_with_tool_calls(self) -> None: """Messages with assistant tool_calls and tool response.""" @@ -65,24 +65,24 @@ def test_chat_completions_with_tool_calls(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert len(data["messages"]) == 3 + + assert len(data) == 3 # User message - assert data["messages"][0]["role"] == "user" - assert data["messages"][0]["parts"][0]["type"] == "text" + assert data[0]["role"] == "user" + assert data[0]["parts"][0]["type"] == "text" # Assistant with tool call - assert data["messages"][1]["role"] == "assistant" - assert data["messages"][1]["parts"][0]["type"] == "tool_call" - assert data["messages"][1]["parts"][0]["name"] == "add" - assert data["messages"][1]["parts"][0]["id"] == "call_123" + assert data[1]["role"] == "assistant" + assert data[1]["parts"][0]["type"] == "tool_call" + assert data[1]["parts"][0]["name"] == "add" + assert data[1]["parts"][0]["id"] == "call_123" # Tool response - assert data["messages"][2]["role"] == "tool" - assert data["messages"][2]["parts"][0]["type"] == "tool_call_response" - assert data["messages"][2]["parts"][0]["id"] == "call_123" - assert data["messages"][2]["parts"][0]["response"] == "4" + assert data[2]["role"] == "tool" + assert data[2]["parts"][0]["type"] == "tool_call_response" + assert data[2]["parts"][0]["id"] == "call_123" + assert data[2]["parts"][0]["response"] == "4" def test_response_input_item_param_format(self) -> None: """ResponseInputItemParam format with typed items.""" @@ -103,22 +103,22 @@ def test_response_input_item_param_format(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert len(data["messages"]) == 3 + + assert len(data) == 3 # Message - assert data["messages"][0]["role"] == "user" - assert data["messages"][0]["parts"][0]["type"] == "text" + assert data[0]["role"] == "user" + assert data[0]["parts"][0]["type"] == "text" # Function call - assert data["messages"][1]["role"] == "assistant" - assert data["messages"][1]["parts"][0]["type"] == "tool_call" - assert data["messages"][1]["parts"][0]["name"] == "get_weather" + assert data[1]["role"] == "assistant" + assert data[1]["parts"][0]["type"] == "tool_call" + assert data[1]["parts"][0]["name"] == "get_weather" # Function call output - assert data["messages"][2]["role"] == "tool" - assert data["messages"][2]["parts"][0]["type"] == "tool_call_response" - assert data["messages"][2]["parts"][0]["response"] == "Sunny, 22C" + assert data[2]["role"] == "tool" + assert data[2]["parts"][0]["type"] == "tool_call_response" + assert data[2]["parts"][0]["response"] == "Sunny, 22C" def test_message_without_type_field(self) -> None: """Messages without explicit 'type' field (EasyInputMessageParam).""" @@ -128,14 +128,14 @@ def test_message_without_type_field(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - assert data["messages"][0]["role"] == "user" + assert data[0]["role"] == "user" def test_invalid_json_wraps_as_plain_text(self) -> None: result = map_input_messages("not json {") assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert data["messages"][0]["parts"][0]["content"] == "not json {" + + assert data[0]["parts"][0]["content"] == "not json {" def test_empty_list_returns_none(self) -> None: assert map_input_messages("[]") is None @@ -151,9 +151,9 @@ def test_plain_string_wraps_as_assistant(self) -> None: result = map_output_messages("The answer is 42.") assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert data["messages"][0]["role"] == "assistant" - assert data["messages"][0]["parts"][0]["content"] == "The answer is 42." + + assert data[0]["role"] == "assistant" + assert data[0]["parts"][0]["content"] == "The answer is 42." def test_chat_completions_output(self) -> None: """Standard chat completions output with finish_reason.""" @@ -167,9 +167,9 @@ def test_chat_completions_output(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert len(data["messages"]) == 1 - msg = data["messages"][0] + + assert len(data) == 1 + msg = data[0] assert msg["role"] == "assistant" assert msg["parts"][0]["type"] == "text" assert msg["parts"][0]["content"] == "Paris is the capital." @@ -193,7 +193,7 @@ def test_chat_completions_with_tool_calls(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - msg = data["messages"][0] + msg = data[0] assert msg["role"] == "assistant" assert msg["parts"][0]["type"] == "tool_call" assert msg["parts"][0]["name"] == "search" @@ -216,8 +216,8 @@ def test_response_json_format(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - msg = data["messages"][0] + + msg = data[0] assert msg["role"] == "assistant" assert msg["parts"][0]["type"] == "text" assert msg["parts"][0]["content"] == "Hello!" @@ -239,7 +239,7 @@ def test_response_json_with_function_call(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - msg = data["messages"][0] + msg = data[0] assert msg["role"] == "assistant" assert msg["parts"][0]["type"] == "tool_call" assert msg["parts"][0]["name"] == "get_weather" @@ -257,5 +257,5 @@ def test_invalid_json_wraps_as_plain_text(self) -> None: result = map_output_messages("bad json") assert result is not None data = json.loads(result) - assert data["version"] == "0.1.0" - assert data["messages"][0]["role"] == "assistant" + + assert data[0]["role"] == "assistant" From c815198b57b29981920aa752f7df4f6ba652cef9 Mon Sep 17 00:00:00 2001 From: Nikhil Navakiran <211831449+nikhilNava@users.noreply.github.com> Date: Fri, 22 May 2026 11:15:11 -0600 Subject: [PATCH 2/4] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../observability/core/message_utils.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py index 16d25dd7..1a49d93a 100644 --- a/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py +++ b/libraries/microsoft-agents-a365-observability-core/microsoft_agents_a365/observability/core/message_utils.py @@ -131,16 +131,16 @@ def serialize_messages( messages = getattr(wrapper, "messages", []) count = len(messages) if isinstance(messages, list) else 0 noun = "message" if count == 1 else "messages" - fallback = [ - { - "role": MessageRole.SYSTEM.value, - "parts": [ - { - "type": "text", - "content": f"[serialization failed: {count} {noun}]", - } - ], - "finish_reason": "error", - } - ] + fallback_message = { + "role": MessageRole.SYSTEM.value, + "parts": [ + { + "type": "text", + "content": f"[serialization failed: {count} {noun}]", + } + ], + } + if isinstance(wrapper, OutputMessages): + fallback_message["finish_reason"] = "error" + fallback = [fallback_message] return json.dumps(fallback, ensure_ascii=False) From b39c5bb41d3da79d3d157c0b91c9562fb1bb3228 Mon Sep 17 00:00:00 2001 From: "Nikhil Chitlur Navakiran (from Dev Box)" Date: Fri, 22 May 2026 11:44:30 -0600 Subject: [PATCH 3/4] Fix W293 linting errors: remove whitespace from blank lines Remove trailing whitespace on 9 blank lines in tests/observability/extensions/openai/test_message_mapper.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../extensions/openai/test_message_mapper.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/observability/extensions/openai/test_message_mapper.py b/tests/observability/extensions/openai/test_message_mapper.py index a551bff7..8d0ea2df 100644 --- a/tests/observability/extensions/openai/test_message_mapper.py +++ b/tests/observability/extensions/openai/test_message_mapper.py @@ -24,7 +24,7 @@ def test_plain_string_wraps_as_user_message(self) -> None: result = map_input_messages("Hello world") assert result is not None data = json.loads(result) - + assert len(data) == 1 assert data[0]["role"] == "user" assert data[0]["parts"][0]["type"] == "text" @@ -39,7 +39,7 @@ def test_chat_completions_format(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - + assert len(data) == 2 assert data[0]["role"] == "system" assert data[0]["parts"][0]["content"] == "You are helpful." @@ -65,7 +65,7 @@ def test_chat_completions_with_tool_calls(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - + assert len(data) == 3 # User message @@ -103,7 +103,7 @@ def test_response_input_item_param_format(self) -> None: result = map_input_messages(raw) assert result is not None data = json.loads(result) - + assert len(data) == 3 # Message @@ -134,7 +134,7 @@ def test_invalid_json_wraps_as_plain_text(self) -> None: result = map_input_messages("not json {") assert result is not None data = json.loads(result) - + assert data[0]["parts"][0]["content"] == "not json {" def test_empty_list_returns_none(self) -> None: @@ -151,7 +151,7 @@ def test_plain_string_wraps_as_assistant(self) -> None: result = map_output_messages("The answer is 42.") assert result is not None data = json.loads(result) - + assert data[0]["role"] == "assistant" assert data[0]["parts"][0]["content"] == "The answer is 42." @@ -167,7 +167,7 @@ def test_chat_completions_output(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - + assert len(data) == 1 msg = data[0] assert msg["role"] == "assistant" @@ -216,7 +216,7 @@ def test_response_json_format(self) -> None: result = map_output_messages(raw) assert result is not None data = json.loads(result) - + msg = data[0] assert msg["role"] == "assistant" assert msg["parts"][0]["type"] == "text" @@ -257,5 +257,5 @@ def test_invalid_json_wraps_as_plain_text(self) -> None: result = map_output_messages("bad json") assert result is not None data = json.loads(result) - + assert data[0]["role"] == "assistant" From fac17193920b42ecc81ad4d5e9666768f021fec8 Mon Sep 17 00:00:00 2001 From: "Nikhil Chitlur Navakiran (from Dev Box)" Date: Fri, 22 May 2026 12:03:28 -0600 Subject: [PATCH 4/4] Fix ruff formatting in test_message_utils.py Collapse single-element lists onto one line per ruff format rules. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/observability/core/test_message_utils.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/observability/core/test_message_utils.py b/tests/observability/core/test_message_utils.py index 4cb2b2bd..2cdaa878 100644 --- a/tests/observability/core/test_message_utils.py +++ b/tests/observability/core/test_message_utils.py @@ -300,9 +300,7 @@ def test_output_message_custom_finish_reason(self): def test_serialized_output_includes_finish_reason(self): """finish_reason is always present in serialized output (required per OTel spec).""" wrapper = OutputMessages( - messages=[ - OutputMessage(role=MessageRole.ASSISTANT, parts=[TextPart(content="Hi")]) - ] + messages=[OutputMessage(role=MessageRole.ASSISTANT, parts=[TextPart(content="Hi")])] ) result = serialize_messages(wrapper) parsed = json.loads(result) @@ -434,9 +432,7 @@ def test_tool_call_message(self): messages=[ OutputMessage( role=MessageRole.ASSISTANT, - parts=[ - ToolCallRequestPart(name="search", id="c1", arguments={"q": "test"}) - ], + parts=[ToolCallRequestPart(name="search", id="c1", arguments={"q": "test"})], finish_reason=FinishReason.TOOL_CALL.value, ) ]